diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..e3603fe277c8f35347e530ec68b724b55ac35444 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,55 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +model-00016-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text +model-00051-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text +model-00045-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text +model-00013-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text +model-00010-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text +model-00015-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text +model-00041-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text +model-00047-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text +model-00021-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text +tokenizer.model filter=lfs diff=lfs merge=lfs -text +model-00046-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text +model-00032-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text +model-00042-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text +model-00025-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text +model-00024-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text +model-00004-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text +model-00037-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text +model-00049-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text +model-00023-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text +model-00033-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text +model-00040-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text +model-00027-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text +model-00009-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text +model-00031-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text +model-00036-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text +model-00018-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text +model-00019-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text +model-00034-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text +model-00022-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text +model-00026-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text +model-00017-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text +model-00020-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text +model-00001-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text +model-00039-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text +model-00038-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text +model-00029-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text +model-00012-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text +model-00035-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text +model-00002-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text +model-00006-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text +model-00044-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text +model-00011-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text +model-00003-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text +model-00005-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text +model-00014-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text +model-00007-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text +model-00043-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text +model-00048-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text +model-00008-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text +model-00030-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text +model-00050-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text +model-00028-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..386bd0a09ea862ac149244862dd5464abe7360fb --- /dev/null +++ b/LICENSE @@ -0,0 +1,34 @@ +OPENPANGU MODEL LICENSE AGREEMENT VERSION 1.0 + +This OPENPANGU MODEL LICENSE AGREEMENT VERSION 1.0 (the "Agreement") is a legal agreement between You and Huawei Technologies Co., Ltd. ("Huawei", "We" or "Us"), and it governs Your reproducing, use, modification, and distribution of openPangu as made available by Huawei under this Agreement. + +By using, reproducing, modifying, distributing, performing or displaying any portion or element of openPangu, or otherwise accepting the terms of this Agreement, You agree to be bound by this Agreement. + +1. Definitions. +1.1. “openPangu” or “Model” means openPangu large language models and software, including trained model weights, parameters (including optimizer states), accompanying source code and scripts released under this Agreement. +1.2. “Derivative Model” means all (1) modifications to the Model, (2) works based on the Model, and (3) any other derivative works of the Model. For clarity, information or content results from operating or otherwise using the Model is not a Derivative Model. +1.3. “You” or “Your” means an individual or Legal Entity exercising permissions granted by this Agreement and/or using the Model for any purpose. +1.4. “Third Party” or “Third Parties” means individuals or legal entities that are not under common control with Us or You. + +2. License Grant. Subject to Your full compliance with the terms and conditions of this Agreement, We hereby grant to You a perpetual, worldwide, non-exclusive, non-transferable, no-charge, royalty-free license (except as stated in Section 3) to use, reproduce, modify, and distribute the Model. + +3. Conditions for License Grant. You represent and warrant that You will not, access, download, install, run, deploy, integrate, modify, or otherwise use the Model, directly or indirectly, within the European Union. + + +4. Redistribution. +4.1. If You distribute the Model or Derivative Model, You shall retain in Your distribution (1) a copy of this agreement, and (2) all copyright notices and other notices of origin included in the Model that are applicable to Your distribution. +4.2. Further, if You distribute or make available to Third Parties a product or service (including another AI model) based on the Model, You are required to (1) display the acknowledgement “Powered by openPangu” and (2) include a trademark notice “openPangu is a trademark of Huawei Technologies Co., Ltd.” on related webpages, user manuals, product documentations or other advertising materials mentioning features of the Model. +4.3. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for Derivative Model made by You as a whole, provided Your use, reproduction, and distribution of the Model otherwise complies with the terms and conditions of this Agreement. + +5. Ownership. We do not claim ownership to any information or content generated using the Model or Derivative Model that are made by You. You are solely responsible for evaluating the accuracy and appropriateness of such information or content for Your use case. + +6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of Huawei, except as required for complying with Section 4.2. + +7. Indemnity. You will indemnify and hold harmless Huawei from and against any claim by any third party arising out of or related to Your use or distribution of the Model or Derivative Model made by You (e.g. a violation against Section 3). For avoidance of doubt, “third party” in this clause include supervisory authorities. + +8. THE MODEL IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE, NONINFRINGEMENT, ACCURACY, OR THE ABSENCE OF LATENT OR OTHER DEFECTS OR ERRORS, WHETHER OR NOT DISCOVERABLE, ALL TO THE GREATEST EXTENT PERMISSIBLE UNDER APPLICABLE LAW. + +9. IN NO EVENT SHALL WE BE LIABLE TO YOU FOR ANY DAMAGES, INCLUDING, BUT NOT LIMITED TO ANY DIRECT, OR INDIRECT, SPECIAL OR CONSEQUENTIAL DAMAGES ARISING FROM YOUR USE OR INABILITY TO USE THE MODEL, IN WHOLE OR IN PART, NO MATTER HOW IT’S CAUSED OR THE LEGAL THEORY IT IS BASED ON, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + + +END OF THE TERMS AND CONDITIONS diff --git a/OPEN SOURCE SOFTWARE NOTICE b/OPEN SOURCE SOFTWARE NOTICE new file mode 100644 index 0000000000000000000000000000000000000000..87473f23483a276c53ff5741064c320f0b79d04e --- /dev/null +++ b/OPEN SOURCE SOFTWARE NOTICE @@ -0,0 +1,635 @@ +OPEN SOURCE SOFTWARE NOTICE + +Please note we provide an open source software notice along with this product and/or this product firmware (in the following just “this product”). The open source software licenses are granted by the respective right holders. And the open source licenses prevail all other license information with regard to the respective open source software contained in the product, including but not limited to End User Software Licensing Agreement. This notice is provided on behalf of Huawei Technologies Co. Ltd. and any of its local subsidiaries which may have provided this product to you in your local country. + +Warranty Disclaimer +THE OPEN SOURCE SOFTWARE IN THIS PRODUCT IS DISTRIBUTED IN THE HOPE THAT IT WILL BE USEFUL, BUT WITHOUT ANY WARRANTY, WITHOUT EVEN THE IMPLIED WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. SEE THE APPLICABLE LICENSES FOR MORE DETAILS. + +Copyright Notice and License Texts + +Software: transformers 4.48.2 +Copyright notice: +Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved. + +License Text: +---------------------------------------- + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +Software: vllm 0.9.1 +Copyright notice: +Copyright 2025 The vLLM team. + +License Text: +---------------------------------------- + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +Software: vllm-ascend 0.9.1 +Copyright notice: +Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved. + +License Text: +---------------------------------------- + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/README.md b/README.md index 7891eec530a67a914f6cd982dfbeb710b7c8a658..3302700dc9d9123c71eaaea80a7bedac8f091ed0 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,85 @@ ---- -license: other -license_name: openpangu-model-license-agreement-version-1.0 -license_link: https://ai.gitcode.com/ascend-tribe/openPangu-R-72B-2512/blob/main/LICENSE ---- +# openPangu-R-72B-2512 +中文 | [English](README_EN.md) + +## 1. 简介 +openPangu-R-72B-2512 是基于昇腾集群训练的MoE模型。模型总参数量74B,激活参数量15B,采用80选8的专家选择机制,支持128k长序列处理。训练数据总量约24T tokens。同一个模型支持快慢思考两种模式切换,慢思考模式下,支持思维链分档(“低”和“高”两种推理深度)。 + +## 2. 模型架构 +openPangu-R-72B-2512 在模型稳定收敛和效果提升方向进行了以下优化: +- 在注意力机制中引入参数式Sink Token技术: 有效缓解极大激活值问题,训练中最大激活值从$10^3$降至$10^2$量级, 提升训练稳定性并对后量化亲和。 + +- K-Norm与Depth-Scaled Sandwich-Norm:为保证attention logits的稳定性,我们采用了K-Norm结构。K-Norm与QK-Norm类似,但只对attention的key施加RMS Norm。K-Norm可起到与QK-Norm类似的稳定性效果,但引入的计算开销更小,同时K-Norm不影响Query的scale,带来更灵活的表达能力。为了保证残差连接的稳定性,我们采用了Depth-Scaled Sandwich-Norm方法。 + +- 注意力架构优化:增加Query头数和注意力头维度,使模型能够从更多角度捕获细粒度语义关系。引入Partial RoPE机制,仅对Query和Key中1/3维度应用位置编码。尽管Key头维度有所增加,但通过将KV组数量减半,KV cache仍可减少37.5%,在保持推理阶段显存和速度优化的同时,实现了更低的训练损失和更优的推理性能。 + +- Adaptive Aux Free负载优化技术:能够自适应调整专家bias更新幅度,减少均衡震荡现象,优化专家负载分布均衡性。 + + +详细架构参数如下: + +| | | +|:---:|:---:| +| **Architecture** | Mixture-of-Experts (MoE) | +| **Total Parameters** | 74B | +| **Activated Parameters** | 15B | +| **Number of Layers** (Dense layer included) | 50 | +| **Number of Dense Layers** | 4 | +| **Number of MTP Modules** | 1 | +| **Hidden Dimension** | 4608 | +| **MoE Hidden Dimension** (per Expert) | 1280 | +| **Attention Mechanism** | GQA | +| **Number of Attention Heads** | 64 | +| **Number of Query Groups** | 4 | +| **Number of Experts** | 80 | +| **Selected Experts per Token** | 8 | +| **Number of Shared Experts** | 2 | +| **Vocabulary Size** | 153K | +| **Context Length** | 128K | + + +## 3. 测评结果 + +| 测评集 | 测评指标 | openPangu-R-72B-2512 快思考 | openPangu-R-72B-2512 慢思考 | +|:------------------:|:----------------------------:|:-----:|:-----:| +| **通用能力** | | | +| LiveBench | Acc (2024-11-25) | 67.3 | 75.2 | +| MMLU-Pro | Exact Match | 84.2 | 84.8 | +| MMLU-ProX | Acc | 76.9 | 80.6 | +| RULER | Acc | 95.6 | 94.7 | +| LongBench V2 | Acc |45.3 |55.3 | +| IF-Eval | Prompt Strict | 86.3 | 79.1 | +| Hallucination-LeaderBoard | 1-HHEM | 96.5 | 97.1 | +| GPQA-Dimaond | Avg@4 | 76.8 | 83.2 | +| SuperGPQA | Acc | 58.9 | 64.2 | +| **数学能力** | | | +| AIME24 | Avg@16 | 75.6 | 89.0 | +| AIME25 | Avg@16 | 60.6 | 81.3 | +| CNMO 2024 | Avg@32 | 77.8 | 82.8 | +| HMMT 2025 | Avg@16 (February) | 45.4 | 74.8 | +| **代码能力** | | | +| LiveCodeBench V6 | Avg@3 (01/25~05/25) | 41.9 | 69.5 | +| Codeforces | Elo Avg@3 (02/25~09/25) | 1044.5 | 1701.4 | +| **Agent工具调用** | | | +| BFCL-V3 | Acc (Prompt) | 74.6 | 76.5 | +| Tau-Bench (airline) | Avg@3 (FC) | 45.3 | 56.0 | +| Tau-Bench (retail) | Avg@3 (FC) | 70.1 | 73.0 | +| Tau2-Bench (airline) | Avg@3 (FC) | 58.0 | 65.3 | +| Tau2-Bench (retail) | Avg@3 (FC) | 71.4 | 78.7 | +| Tau2-Bench (telecom) | Avg@3 (FC) | 48.8 | 49.4 | +| AceBench | Acc (Prompt) | 74.3 | 79.6 | + + +## 4. 部署和使用 +- 使用omni-infer推理框架,参考[[omniinfer_for_openpangu_r_72b_2512](doc/omniinfer_for_openpangu_r_72b_2512.md)] + +## 5. 模型许可证 +除文件中对开源许可证另有约定外,openPangu-R-72B-2512 模型根据 OPENPANGU MODEL LICENSE AGREEMENT VERSION 1.0 授权,旨在允许使用并促进人工智能技术的进一步发展。有关详细信息,请参阅模型存储库根目录中的 [LICENSE](LICENSE) 文件。 + +## 6. 免责声明 +由于 openPangu-R-72B-2512 (“模型”)所依赖的技术固有的限制,以及人工智能生成的内容是由盘古自动生成的,华为无法对以下事项做出任何保证: +- 该模型的输出通过AI算法自动生成,不能排除某些信息可能存在缺陷、不合理或引起不适的可能性,生成的内容不代表华为的态度或立场; +- 无法保证该模型100%准确、可靠、功能齐全、及时、安全、无错误、不间断、持续稳定或无任何故障; +- 该模型的输出内容不构成任何建议或决策,也不保证生成的内容的真实性、完整性、准确性、及时性、合法性、功能性或实用性。生成的内容不能替代医疗、法律等领域的专业人士回答您的问题。生成的内容仅供参考,不代表华为的任何态度、立场或观点。您需要根据实际情况做出独立判断,华为不承担任何责任。 + +## 7. 反馈 +如果有任何意见和建议,请提交issue或联系[openPangu@huawei.com](url)。 diff --git a/README_EN.md b/README_EN.md new file mode 100644 index 0000000000000000000000000000000000000000..4ee1db335104a8618875990eb6ab6ac20838aea2 --- /dev/null +++ b/README_EN.md @@ -0,0 +1,85 @@ +# openPangu-R-72B-2512 +[中文](README.md) | English + +## 1. Introduction +openPangu-R-72B-2512 is an MoE model trained on Ascend. The model has 74B total parameters and 15B activated parameters. It selects top 8 experts out of 80 routed experts. Its context length is 128k. The total pretraining data contains 24T tokens. It supports switching between two modes (fast-thinking and slow-thinking). In slow-thinking mode, we support two types of reasoning effort ('low' and 'high'). + +## 2. Architecture +openPangu-R-72B-2512 includes several enhancements: +- Parametric sink token: Effectively mitigates the problem of extremely large activation values, reducing the maximum activation value from the order of $10^3$ to $10^2$ during training, which improves training stability and enhances compatibility with post-quantization. + +- K-Norm and Depth-Scaled Sandwich-Norm: To ensure the stability of attention logits, we apply K-Norm, a structure analogous to QK-Norm but applies RMS Norm solely to the attention keys. This approach achieves stability effects comparable to QK-Norm while introducing less computational overhead. Moreover, by preserving the original scale of Query, K-Norm offer greater expressive flexibility. To maintain the stability of residual connections, we employ the Depth-Scaled Sandwich-Norm. + +- Attention design: We increase Query heads and attention head dimensions to enable the model to capture fine-grained semantic relationships from multiple perspectives. The Partial RoPE mechanism applies positional encoding to only 1/3 of the dimensions in Query and Key. Although the Key head dimension increases, halving the number of KV groups still reduces KV cache by 37.5%, achieving lower training loss and improved inference performance while maintaining memory and speed optimizations during the inference stage. + +- Adaptive Aux-Free Load Balancing Strategy:This approach adaptively adjusts the update magnitude of expert bias, mitigates balancing oscillations, and optimizes the equilibrium of expert load distribution. + +Hyperparameters related to model architecture are as follows: + +| | | +|:---:|:---:| +| **Architecture** | Mixture-of-Experts (MoE) | +| **Total Parameters** | 74B | +| **Activated Parameters** | 15B | +| **Number of Layers** (Dense layer included) | 50 | +| **Number of Dense Layers** | 4 | +| **Number of MTP Modules** | 1 | +| **Hidden Dimension** | 4608 | +| **MoE Hidden Dimension** (per Expert) | 1280 | +| **Attention Mechanism** | GQA | +| **Number of Attention Heads** | 64 | +| **Number of Query Groups** | 4 | +| **Number of Experts** | 80 | +| **Selected Experts per Token** | 8 | +| **Number of Shared Experts** | 2 | +| **Vocabulary Size** | 153K | +| **Context Length** | 128K | + +## 3. Results +| Benchmark | Metric | openPangu-R-72B-2512 Fast-thinking | openPangu-R-72B-2512 Slow-thinking | +|:------------------:|:----------------------------:|:-----:|:-----:| +| **General** | | | +| LiveBench | Acc (2024-11-25) | 67.3 | 75.2 | +| MMLU-Pro | Exact Match | 84.2 | 84.8 | +| MMLU-ProX | Acc | 76.9 | 80.6 | +| RULER | Acc | 95.6 | 94.7 | +| LongBench V2 | Acc |45.3 |55.3 | +| IF-Eval | Prompt Strict | 86.3 | 79.1 | +| Hallucination-LeaderBoard | 1-HHEM | 96.5 | 97.1 | +| GPQA-Dimaond | Avg@4 | 76.8 | 83.2 | +| SuperGPQA | Acc | 58.9 | 64.2 | +| **Math** | | | +| AIME24 | Avg@16 | 75.6 | 89.0 | +| AIME25 | Avg@16 | 60.6 | 81.3 | +| CNMO 2024 | Avg@32 | 77.8 | 82.8 | +| HMMT 2025 | Avg@16 (February) | 45.4 | 74.8 | +| **Coding** | | | +| LiveCodeBench V6 | Avg@3 (01/25~05/25) | 41.9 | 69.5 | +| Codeforces | Elo Avg@3 (02/25~09/25) | 1044.5 | 1701.4 | +| **Agentic Tool Use** | | | +| BFCL-V3 | Acc (Prompt) | 74.6 | 76.5 | +| Tau-Bench (airline) | Avg@3 (FC) | 45.3 | 56.0 | +| Tau-Bench (retail) | Avg@3 (FC) | 70.1 | 73.0 | +| Tau2-Bench (airline) | Avg@3 (FC) | 58.0 | 65.3 | +| Tau2-Bench (retail) | Avg@3 (FC) | 71.4 | 78.7 | +| Tau2-Bench (telecom) | Avg@3 (FC) | 48.8 | 49.4 | +| AceBench | Acc (Prompt) | 74.3 | 79.6 | + +## 4. Deployment +- omni-infer:please refer to [[omniinfer_for_openpangu_r_72b_2512](doc/omniinfer_for_openpangu_r_72b_2512_EN.md)] + +## 5. Model License +Unless otherwise noted, the openPangu-R-72B-2512 model is licensed under the terms and conditions of OPENPANGU MODEL LICENSE AGREEMENT VERSION 1.0, which is intended to be used permissively and enable the further development of artificial intelligence technologies. Please refer to the [LICENSE](LICENSE) file located in the root directory of the model repository for details. + +## 6. Disclaimer +Due to the technical limitations inherent in the technology on which the openPangu-R-72B-2512 model (“Model”) relies and the fact that the artificial intelligence generated content is automatically produced by Model, Huawei cannot make any guarantees regarding the following matters: + +- The output of this Model is automatically generated via AI algorithms, it does not rule out the possibility that some of the information may be flawed, unreasonable, or cause discomfort, and the generated content does not represent Huawei's attitude or standpoint; +- There is no guarantee that this Model is 100% accurate, reliable, functional, timely, secure and safety, error-free, uninterrupted, continuously stable, or free of any faults; +- The output of this Model does not constitute any advices or decisions for you, and it does not guarantee the authenticity, completeness, accuracy, timeliness, legality, functionality, or practicality of the generated content. The generated content cannot replace professionals in medical, legal, and other fields in answering your questions. The generated content is for your reference only and does not represent any attitude, standpoint, or position of Huawei. You need to make independent judgments based on your actual situation, and Huawei does not assume any responsibilities. + +## 7. Contact +If you have any question, please raise an issue or contact us at [openPangu@huawei.com](url). + + + diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..80966a8db7cafdfe3a17b1408f4076207ba9e9a1 --- /dev/null +++ b/config.json @@ -0,0 +1,45 @@ +{ + "architectures": [ + "PanguProMoEV2ForCausalLM" + ], + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_pangu_moe.PanguProMoEConfig", + "AutoModel": "modeling_pangu_moe.PanguProMoEModel", + "AutoModelForCausalLM": "modeling_pangu_moe.PanguProMoEForCausalLM" + }, + "bos_token_id": 1, + "eos_token_id": 45892, + "first_k_dense_replace": 4, + "hidden_act": "silu", + "hidden_size": 4608, + "initializer_range": 0.02, + "intermediate_size": 10240, + "max_position_embeddings": 4096, + "model_type": "PanguProMoE", + "moe_intermediate_size": 1280, + "n_routed_experts": 80, + "n_shared_experts": 2, + "norm_topk_prob": true, + "num_attention_heads": 64, + "num_experts_per_tok": 8, + "num_hidden_layers": 50, + "num_key_value_heads": 4, + "num_nextn_predict_layers": 1, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000, + "routed_scaling_factor": 2.5, + "router_enable_expert_bias": true, + "sandwich_norm": true, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.48.2", + "use_cache": true, + "vocab_size": 153600, + "qk_nope_dim": 128, + "qk_rope_dim": 64, + "v_channels": 128, + "param_sink_number": 128, + "param_sink_with_value": true +} diff --git a/configuration_pangu_moe.py b/configuration_pangu_moe.py new file mode 100644 index 0000000000000000000000000000000000000000..e1c2be8fd9edfa15067d7a4c83e0cac4d183b31b --- /dev/null +++ b/configuration_pangu_moe.py @@ -0,0 +1,96 @@ +# coding=utf-8 +# Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. +# Copyright 2024 The Qwen team, Alibaba Group and the HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" PanguProMoE model configuration""" + + +from transformers.configuration_utils import PretrainedConfig +from transformers.utils import logging + + +logger = logging.get_logger(__name__) + + +class PanguProMoEConfig(PretrainedConfig): + + model_type = "PanguProMoE" + _auto_class = "AutoConfig" + + def __init__( + self, + vocab_size=153376, + hidden_size=4608, + intermediate_size=10240, + num_hidden_layers=50, + num_attention_heads=64, + num_key_value_heads=4, + mlp_only_layers=[0,1,2,3], + hidden_act="silu", + max_position_embeddings=8192, + initializer_range=0.02, + rms_norm_eps=1e-5, + use_cache=True, + tie_word_embeddings=False, + rope_theta=100000, + moe_intermediate_size=1280, + shared_expert_intermediate_size=2560, + num_experts_per_tok=8, + num_experts=80, + norm_topk_prob=True, + router_enable_expert_bias=True, + output_router_logits=False, + routed_scaling_factor=2.5, + qk_nope_dim = 128, + qk_rope_dim = 64, + v_channels = 128, + sandwich_norm=True, + param_sink_number = 128, + param_sink_with_value=True, + **kwargs, + ): + self.vocab_size = vocab_size + self.max_position_embeddings = max_position_embeddings + self.hidden_size = hidden_size + self.num_hidden_layers = num_hidden_layers + self.num_attention_heads = num_attention_heads + self.num_key_value_heads = num_key_value_heads + self.hidden_act = hidden_act + self.initializer_range = initializer_range + self.rms_norm_eps = rms_norm_eps + self.use_cache = use_cache + self.rope_theta = rope_theta + self.mlp_only_layers = mlp_only_layers + self.intermediate_size = intermediate_size + + # MoE arguments + self.moe_intermediate_size = moe_intermediate_size + self.shared_expert_intermediate_size = shared_expert_intermediate_size + self.num_experts_per_tok = num_experts_per_tok + self.num_experts = num_experts + self.norm_topk_prob = norm_topk_prob + self.output_router_logits = output_router_logits + self.router_enable_expert_bias = router_enable_expert_bias + self.routed_scaling_factor = routed_scaling_factor + self.qk_nope_dim = qk_nope_dim + self.qk_rope_dim = qk_rope_dim + self.v_channels = v_channels + self.sandwich_norm = sandwich_norm + self.param_sink_number = param_sink_number + self.param_sink_with_value = param_sink_with_value + + super().__init__( + tie_word_embeddings=tie_word_embeddings, + **kwargs, + ) diff --git a/doc/omniinfer_for_openpangu_r_72b_2512.md b/doc/omniinfer_for_openpangu_r_72b_2512.md new file mode 100644 index 0000000000000000000000000000000000000000..b8383b02a12395e12efef17a5da8844454f89260 --- /dev/null +++ b/doc/omniinfer_for_openpangu_r_72b_2512.md @@ -0,0 +1,118 @@ +# openPangu-R-72B-2512在Omni-Infer部署指导文档 + +## 硬件环境和部署方式 +PD混部,只需要1台Atlas 800T A3机器中的4个die。 + +## 代码和镜像 +- Omni-Infer代码版本:release_v0.7.0 +- 配套镜像:参考 https://gitee.com/omniai/omniinfer/releases 中v0.7.0镜像,以A3硬件和arm架构为例,使用“docker pull swr.cn-east-4.myhuaweicloud.com/omni/omniinfer-a3-arm:release_v0.7.0-vllm”。 + +## 部署 +### 1. 启动镜像 +```bash +IMAGE=swr.cn-east-4.myhuaweicloud.com/omni/omniinfer-a3-arm:release_v0.7.0-vllm +NAME=omniinfer-v0.7.0 # Custom docker name +NPU_NUM=16 # A3节点die数 +DEVICE_ARGS=$(for i in $(seq 0 $((NPU_NUM-1))); do echo -n "--device /dev/davinci${i} "; done) + +# Run the container using the defined variables +# Note if you are running bridge network with docker, Please expose available ports for multiple nodes communication in advance +# To prevent device interference from other docker containers, add the argument "--privileged" +docker run -itd \ + --name=${NAME} \ + --network host \ + --privileged \ + --ipc=host \ + $DEVICE_ARGS \ + --device=/dev/davinci_manager \ + --device=/dev/devmm_svm \ + --device=/dev/hisi_hdc \ + -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \ + -v /usr/local/Ascend/firmware:/usr/local/Ascend/firmware \ + -v /usr/local/sbin/npu-smi:/usr/local/sbin/npu-smi \ + -v /etc/ascend_install.info:/etc/ascend_install.info \ + -v /mnt/:/mnt/ \ + -v /data:/data \ + -v /home/work:/home/work \ + --entrypoint /bin/bash \ + swr.cn-east-4.myhuaweicloud.com/omni/omniinfer-a3-arm:release_v0.7.0-vllm +``` +需要保证模型权重和本项目代码可在容器中访问。进入容器: +```bash +docker exec -it $NAME /bin/bash +``` + +### 2. 将examples/start_serving_openpangu_r_72b_2512.sh脚本放入omniinfer/tools/scripts路径并执行 + +```bash +git clone -b release_v0.7.0 https://gitee.com/omniai/omniinfer.git +cd omniinfer/tools/scripts +# 需修改serving脚本中model-path模型路径、master-ip机器IP地址和PYTHONPATH。 +bash start_serving_openpangu_r_72b_2512.sh +``` + +### 3. 发请求测试 + +服务启动后,可发送测试请求。 + +```bash +curl http://0.0.0.0:8000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "openpangu_r_72b_2512", + "messages": [ + { + "role": "user", + "content": "Who are you?" + } + ], + "temperature": 1.0, + "top_p": 0.8, + "top_k": -1, + "vllm_xargs": {"top_n_sigma": 0.05}, + "chat_template_kwargs": {"think": true, "reasoning_effort": "low"} + }' + ``` + ```bash + # 工具使用 +curl http://0.0.0.0:8000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "openpangu_r_72b_2512", + "messages": [ + {"role": "system", "content": "你是华为公司开发的盘古模型。\n现在是2025年7月30日"}, + {"role": "user", "content": "深圳明天的天气如何?"} + ], + "tools": [ + { + "type": "function", + "function": { + "name": "get_current_weather", + "description": "获取指定城市的当前天气信息,包括温度、湿度、风速等数据。", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "城市名称,例如:北京、深圳。支持中文或拼音输入。" + }, + "date": { + "type": "string", + "description": "查询日期,格式为 YYYY-MM-DD(遵循 ISO 8601 标准)。例如:2023-10-01。" + } + }, + "required": ["location", "date"], + "additionalProperties": "false" + } + } + } + ], + "temperature": 1.0, + "top_p": 0.8, + "top_k": -1, + "vllm_xargs": {"top_n_sigma": 0.05}, + "chat_template_kwargs": {"think": true, "reasoning_effort": "high"} + }' +``` +模型默认是慢思考模式,在慢思考模式下,模型支持思维链分档,可通过请求体字段"chat_template_kwargs": {"think": true, "reasoning_effort": "high"}中"reasoning_effort": "high"和"low"平衡模型精度和效率。 +模型的慢思考模式,可通过请求体字段"chat_template_kwargs": {"think": true/false} 开启和关闭。 diff --git a/doc/omniinfer_for_openpangu_r_72b_2512_EN.md b/doc/omniinfer_for_openpangu_r_72b_2512_EN.md new file mode 100644 index 0000000000000000000000000000000000000000..5473e53fbb1afc607a5ef1e2f57904ea2a3c69ab --- /dev/null +++ b/doc/omniinfer_for_openpangu_r_72b_2512_EN.md @@ -0,0 +1,119 @@ +# Deployment Guide for openPangu-R-72B-2512 on Omni-Infer + +## Hardware Environment and Deployment Method +PD hybrid deployment, requiring only 4 dies of one Atlas 800T A3 machine. + +## Codes and Image +- Omni-Infer code version: release_v0.7.0 +- Docker Image: Refer to the v0.7.0 image in https://gitee.com/omniai/omniinfer/releases. For example, for A3 hardware and ARM architecture, use "docker pull swr.cn-east-4.myhuaweicloud.com/omni/omniinfer-a3-arm:release_v0.7.0-vllm". + +## Deployment +### 1. Launch the image +```bash +IMAGE=swr.cn-east-4.myhuaweicloud.com/omni/omniinfer-a3-arm:release_v0.7.0-vllm +NAME=omniinfer-v0.7.0 # Custom docker name +NPU_NUM=16 # 16 dies of A3 node +DEVICE_ARGS=$(for i in $(seq 0 $((NPU_NUM-1))); do echo -n "--device /dev/davinci${i} "; done) + +# Run the container using the defined variables +# Note if you are running bridge network with docker, Please expose available ports for multiple nodes communication in advance +# To prevent device interference from other docker containers, add the argument "--privileged" +docker run -itd \ + --name=${NAME} \ + --network host \ + --privileged \ + --ipc=host \ + $DEVICE_ARGS \ + --device=/dev/davinci_manager \ + --device=/dev/devmm_svm \ + --device=/dev/hisi_hdc \ + -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \ + -v /usr/local/Ascend/firmware:/usr/local/Ascend/firmware \ + -v /usr/local/sbin/npu-smi:/usr/local/sbin/npu-smi \ + -v /etc/ascend_install.info:/etc/ascend_install.info \ + -v /mnt/:/mnt/ \ + -v /data:/data \ + -v /home/work:/home/work \ + --entrypoint /bin/bash \ + swr.cn-east-4.myhuaweicloud.com/omni/omniinfer-a3-arm:release_v0.7.0-vllm +``` +Ensure that the model checkpoint and the project code are accessible within the container. Enter the container: +```bash +docker exec -it $NAME /bin/bash +``` + +### 2. Put examples/start_serving_openpangu_r_72b_2512.sh in the omniinfer/tools/scripts path and start the serving script + +```bash +git clone -b release_v0.7.0 https://gitee.com/omniai/omniinfer.git +cd omniinfer/tools/scripts +# You need to modify the model-path, master-ip address and PYTHONPATH in the serving script. +bash start_serving_openpangu_r_72b_2512.sh +``` + +### 3. Send Testing Requests + +After the service is started, we can send testing requests. + +```bash +curl http://0.0.0.0:8000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "openpangu_r_72b_2512", + "messages": [ + { + "role": "user", + "content": "Who are you?" + } + ], + "temperature": 1.0, + "top_p": 0.8, + "top_k": -1, + "vllm_xargs": {"top_n_sigma": 0.05}, + "chat_template_kwargs": {"think": true, "reasoning_effort": "low"} + }' + ``` + ```bash +# Tool use +curl http://0.0.0.0:8000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "openpangu_r_72b_2512", + "messages": [ + {"role": "system", "content": "你是华为公司开发的盘古模型。\n现在是2025年7月30日"}, + {"role": "user", "content": "深圳明天的天气如何?"} + ], + "tools": [ + { + "type": "function", + "function": { + "name": "get_current_weather", + "description": "获取指定城市的当前天气信息,包括温度、湿度、风速等数据。", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "城市名称,例如:北京、深圳。支持中文或拼音输入。" + }, + "date": { + "type": "string", + "description": "查询日期,格式为 YYYY-MM-DD(遵循 ISO 8601 标准)。例如:2023-10-01。" + } + }, + "required": ["location", "date"], + "additionalProperties": "false" + } + } + } + ], + "temperature": 1.0, + "top_p": 0.8, + "top_k": -1, + "vllm_xargs": {"top_n_sigma": 0.05}, + "chat_template_kwargs": {"think": true, "reasoning_effort": "high"} + }' +``` + +The model is in slow-thinking mode by default. In slow-thinking mode, you can specify different reasoning effort by setting the "reasoning_effort" parameter in "chat_template_kwargs" to "high" or "low" to balance model accuracy and efficiency. +openPangu-R-72B-2512 supports switching between slow-thinking and fast-thinking mode by setting {"think": true/false} in "chat_template_kwargs". \ No newline at end of file diff --git a/examples/start_serving_openpangu_r_72b_2512.sh b/examples/start_serving_openpangu_r_72b_2512.sh new file mode 100644 index 0000000000000000000000000000000000000000..d7a3d9ad867b4fcd2464cbb947b2290849f991e2 --- /dev/null +++ b/examples/start_serving_openpangu_r_72b_2512.sh @@ -0,0 +1,57 @@ +# Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. + +export ASCEND_RT_VISIBLE_DEVICES=0,1,2,3 +export VLLM_USE_V1=1 +export VLLM_WORKER_MULTIPROC_METHOD=fork +export VLLM_ENABLE_MC2=0 +export USING_LCCL_COM=0 + +export OMNI_USE_PANGU=1 +export ENABLE_PREFILL_TND=1 + +export HCCL_OP_EXPANSION_MODE="AIV" +export VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 +export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True + +export HCCL_RDMA_TIMEOUT=5 +export HCCL_DETERMINISTIC=False +export ASCEND_GLOBAL_LOG_LEVEL=3 +export CPU_AFFINITY_CONF=2 +export VLLM_LOGGING_LEVEL=INFO + +export HCCL_BUFFSIZE=1000 +export HCCL_CONNECT_TIMEOUT=1800 +export HCCL_EXEC_TIMEOUT=1800 +export HCCL_INTRA_ROCE_ENABLE=1 +export HCCL_INTRA_PCIE_ENABLE=0 +export FORCE_ENABLE_CHUNK_PREFILL=1 + +export USE_REASONING=${USE_REASONING:=1} +export USE_TOOL=${USE_TOOL:=1} +if [ "$USE_REASONING" = "1" ]; then + reasoning="--reasoning-parser pangu" +fi +if [ "$USE_TOOL" = "1" ]; then + tools="--enable-auto-tool-choice --tool-call-parser pangu" +fi + +export PYTHONPATH=/path/to/omniinfer/:$PYTHONPATH +rm -r -f .torchair_cache/ + +python start_api_servers.py \ + --num-servers 1 \ + --model-path /path/to/model/ \ + --master-ip 0.0.0.0 \ + --tp 4 \ + --num-dp 1 \ + --master-port 3512 \ + --served-model-name openpangu_r_72b_2512 \ + --log-dir apiserverlog_pangu72B_hybrid_chunk \ + --extra-args "--max-num-batched-tokens 2048 --enforce-eager --no-enable-prefix-caching --enable-expert-parallel --max-num-seqs 32 --long-prefill-token-threshold 1024" \ + --base-api-port 8000 \ + --gpu-util 0.90 \ + --no-enable-prefix-caching \ + --max-model-len 131072 \ + $reasoning \ + $tools \ + --additional-config '{"graph_model_compile_config":{"level":1, "use_ge_graph_cached":true, "decode_gear_list": [32]}, "enable_hybrid_graph_mode": false, "expert_parallel_size": 4, "expert_tensor_parallel_size": 1}' & diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91ad7e0656e52ba69e09a627fc553d1ed2e5d52f --- /dev/null +++ b/generation_config.json @@ -0,0 +1,11 @@ +{ + "_from_model_config": true, + "bos_token_id": 0, + "eos_token_id": 45892, + "do_sample": true, + "temperature": 1.0, + "top_p": 0.8, + "top_n_sigma": 0.05, + "top_k": -1, + "transformers_version": "4.48.2" +} diff --git a/model-00001-of-000051.safetensors b/model-00001-of-000051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..29ad59b8d4531754e80d14979ed7498baba4a46f --- /dev/null +++ b/model-00001-of-000051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:faea9c15e58366768bd204ef27f631f1b91e258b1fdfbebb1eea3f481600dd06 +size 1899599744 diff --git a/model-00002-of-000051.safetensors b/model-00002-of-000051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..00b42dc2d57a77dc1d5757a91754f073aa4d8538 --- /dev/null +++ b/model-00002-of-000051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f92258b47acb3e71c7072e741c117617d49f5d4c4ffcce559d971b9638c478f +size 484022000 diff --git a/model-00003-of-000051.safetensors b/model-00003-of-000051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e842b04d4ea09e947a99ae3f694f43ac9db8fd99 --- /dev/null +++ b/model-00003-of-000051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96a77e9c991a6f78b03edada807dff37ba8d6083c7ff4d3afd77d83dad3cd77f +size 484022000 diff --git a/model-00004-of-000051.safetensors b/model-00004-of-000051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3c5bc551dd8688ec8331ab3a08dee34b59e9ec1d --- /dev/null +++ b/model-00004-of-000051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0195156ab96017be8d878d9aec1c55c2ed522180fce949ddf30dc7f028b127b +size 484022000 diff --git a/model-00005-of-000051.safetensors b/model-00005-of-000051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bec98c0bbd684d4b08f718ab72c0e90fa5b17e5e --- /dev/null +++ b/model-00005-of-000051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f1ae6f3c3bab2c5913505284600cddafba09a887a61496e1bb7c24b56f77b66 +size 3103607896 diff --git a/model-00006-of-000051.safetensors b/model-00006-of-000051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..eecabf36b7af1a7596d4a1f302ef93b6ad0a1cf8 --- /dev/null +++ b/model-00006-of-000051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb7d09f5bd129ec8eac37d63644c3fdd52b89c6de128712865078b3f6da3a5e1 +size 3103607896 diff --git a/model-00007-of-000051.safetensors b/model-00007-of-000051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..75c6db46c6a185df7e72b0e5f9514db2413fbae4 --- /dev/null +++ b/model-00007-of-000051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a3346f29473783c660dd4c4af0e7d955cbef3c474e64a751a04bed38665e6dc +size 3103607896 diff --git a/model-00008-of-000051.safetensors b/model-00008-of-000051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5cdb482524adff5cb616b3a21950c1c55ba54b99 --- /dev/null +++ b/model-00008-of-000051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52fd330d2faa49e6bca4eea2d8c78489ce6e298e55b9b9fbaff9e84eb1b7c2ce +size 3103607896 diff --git a/model-00009-of-000051.safetensors b/model-00009-of-000051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b2d9ca15e49dc3a47e4d4a149f3eb7cbadf03feb --- /dev/null +++ b/model-00009-of-000051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c59e8651cd10ebe8debe65fc620e1ab68efab24018a8cb7e2e0d2520b929a98 +size 3103607896 diff --git a/model-00010-of-000051.safetensors b/model-00010-of-000051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cae939e1abbf8b37c29e8e1890db478cc0fcd343 --- /dev/null +++ b/model-00010-of-000051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:348a826c4153084ed0195a2989083772ba28009cb8d2479a501bd9e3e5e26686 +size 3103607896 diff --git a/model-00011-of-000051.safetensors b/model-00011-of-000051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ecf0814736982645bd927effa746a9363067f17b --- /dev/null +++ b/model-00011-of-000051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3f60733b0617fdc21159ec975182a44a727ab292dd9d58d9ac7b400075b87a9 +size 3103608152 diff --git a/model-00012-of-000051.safetensors b/model-00012-of-000051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7c574842e52e013da0473bf58ebcedc9231f4eb4 --- /dev/null +++ b/model-00012-of-000051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5536de41c6b14feb5df47b3be85e70a89337cf2480933eb947e1a1dc3d447fc7 +size 3103608152 diff --git a/model-00013-of-000051.safetensors b/model-00013-of-000051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3414f9cc8567f3e704568dcef5d988b6372eeded --- /dev/null +++ b/model-00013-of-000051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f02a6449935c211e5a4347c73b22763f127e78f74aa736fa5b1abbeb0aaa1c87 +size 3103608152 diff --git a/model-00014-of-000051.safetensors b/model-00014-of-000051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0bd04de09a9505ef2a626c79d1ee20d3e1d9c844 --- /dev/null +++ b/model-00014-of-000051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f32dd5f9f4414ea611ee0bf5ed343b7604785194a8c50e4c8f57cbec41f8d68 +size 3103608152 diff --git a/model-00015-of-000051.safetensors b/model-00015-of-000051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6e3df622ffaab5c1a6498b335511a817cb2b5bbf --- /dev/null +++ b/model-00015-of-000051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4b7666e445966de147f4860a397f93f4b5c7635cbd469ac3c442af7ba939962 +size 3103608152 diff --git a/model-00016-of-000051.safetensors b/model-00016-of-000051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ab2173892da9de46f39dd3db7e51added214dcf1 --- /dev/null +++ b/model-00016-of-000051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ecb4ad21311887f4c44eeb94b0cfd502821db1dc1d12611b5b652a8a84d4fbe +size 3103608152 diff --git a/model-00017-of-000051.safetensors b/model-00017-of-000051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..94f62eac478127a42a62afcea9f41301b0d727ab --- /dev/null +++ b/model-00017-of-000051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db6908a54fdb8640a406a78c6fb68c035f9cd1982bf4a9853e102941a4f50a2b +size 3103608152 diff --git a/model-00018-of-000051.safetensors b/model-00018-of-000051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4eccbec4c8ad554fc1917662127cc9af128edcf0 --- /dev/null +++ b/model-00018-of-000051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10188bcb0c9ce4bd8aa8b4084ccd2bb4bb830f95507f5032fc6ac22168d676be +size 3103608152 diff --git a/model-00019-of-000051.safetensors b/model-00019-of-000051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..563c7788258c5528d0a38a608e6eda1cce761cfc --- /dev/null +++ b/model-00019-of-000051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b30d5babd023187eef88826a3b505a06e586fc458b7a2c5da481473f6210e889 +size 3103608152 diff --git a/model-00020-of-000051.safetensors b/model-00020-of-000051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..473c0315cce86135da1605773c20a79438be8934 --- /dev/null +++ b/model-00020-of-000051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37bf95087b8495dd8065020ccc550d73bfbc99b12419c54dc902aeba8db9d1e6 +size 3103608152 diff --git a/model-00021-of-000051.safetensors b/model-00021-of-000051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..37117d8a2ac5f5db5ae692f6afd06df44325e129 --- /dev/null +++ b/model-00021-of-000051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8741ca6de1c6f1c5c2788023d9a8db0c82b503ec9fe49ba076f07d90c5de92ba +size 3103608152 diff --git a/model-00022-of-000051.safetensors b/model-00022-of-000051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..478025654de069000b89cb4300ba2661507f0af4 --- /dev/null +++ b/model-00022-of-000051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12a2c41a55ce55e98be091bc4c2333e4a7e69dee5d79fc222fd27ab30e3ce5cb +size 3103608152 diff --git a/model-00023-of-000051.safetensors b/model-00023-of-000051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5f6c134344dba27ea4571914148d2cf0202f5f8a --- /dev/null +++ b/model-00023-of-000051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b6b91c53ce720c03347dad507730b16ecb9eb9d2bd6c468a91150cd9c23cd30 +size 3103608152 diff --git a/model-00024-of-000051.safetensors b/model-00024-of-000051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d867d3201b0d7face523ea71af76ab748e80bed8 --- /dev/null +++ b/model-00024-of-000051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d441eaad36da0df2dc9825c6909b7b29d907cfd00089af1a007c45632e5e115 +size 3103608152 diff --git a/model-00025-of-000051.safetensors b/model-00025-of-000051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..61e08abbf090a851c1a0247915b6edf8ae04264f --- /dev/null +++ b/model-00025-of-000051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e8f54dbb2414b22a883c110920b7bd84771a2e2dbb78a85b759c7ece195bc6c +size 3103608152 diff --git a/model-00026-of-000051.safetensors b/model-00026-of-000051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ab38456084e440f5cc609360fb7343992b2ae951 --- /dev/null +++ b/model-00026-of-000051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cf8a05cb3853131aaff5b1a2357cdd4462cde2d9e57ad35a5779c0f90df361e +size 3103608152 diff --git a/model-00027-of-000051.safetensors b/model-00027-of-000051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..79d4e83fa252eec7034bc43c149883f29b5ac1de --- /dev/null +++ b/model-00027-of-000051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad286b9f5200c6de8a97c0eecccafafbbea7a710716abd0bad30dfc768b0650c +size 3103608152 diff --git a/model-00028-of-000051.safetensors b/model-00028-of-000051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..369f1c6cd73f39418d2d40b7a1fede3e52a12d29 --- /dev/null +++ b/model-00028-of-000051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:291d66cb2467c36e2642250c40445fdca3cadbfbbf784b66fbbd6356aa3a1a18 +size 3103608152 diff --git a/model-00029-of-000051.safetensors b/model-00029-of-000051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5392738accac9caf3dfaa2cc8faf691c7925c4b3 --- /dev/null +++ b/model-00029-of-000051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f01876779fe5f926a1f4ad070e5cfbdb061dcd424272c35d3e5048254355fa13 +size 3103608152 diff --git a/model-00030-of-000051.safetensors b/model-00030-of-000051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c771947412f5876fd823148471a029f1d1ca4203 --- /dev/null +++ b/model-00030-of-000051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:150c6e3f25cc165d23a8af45ee8859324267fb85825093472421849a2c994bcb +size 3103608152 diff --git a/model-00031-of-000051.safetensors b/model-00031-of-000051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1812e020e776ee06db754ae221a735c1538d1706 --- /dev/null +++ b/model-00031-of-000051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f83613272553ca7d84303dc88527ee273ba90244e6a8bda34df0b4ba2be337ed +size 3103608152 diff --git a/model-00032-of-000051.safetensors b/model-00032-of-000051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c6333e8e775300750cb9455ce6c02337bfc70cbd --- /dev/null +++ b/model-00032-of-000051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bb600e04ab0220ad2cc50e7bb8025db75c4be9fc9478f356e393d11231ac5cb +size 3103608152 diff --git a/model-00033-of-000051.safetensors b/model-00033-of-000051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..088fc0bf9b2d64d523b53690c59b29e80848f858 --- /dev/null +++ b/model-00033-of-000051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b15a5bb9a59506a6ebb7224135be2c2f4816d0cbd16c096404d44dfed953712 +size 3103608152 diff --git a/model-00034-of-000051.safetensors b/model-00034-of-000051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8b38faffee6276ef5a468a5eae33a31675dc47a2 --- /dev/null +++ b/model-00034-of-000051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c87358d675516d50057fd7baf809508b14a69751805468ef7edd21f9b3b270b +size 3103608152 diff --git a/model-00035-of-000051.safetensors b/model-00035-of-000051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5893bbcd2062744542606cc6fe4ebb40872ab93c --- /dev/null +++ b/model-00035-of-000051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:751356ba1cb106cb53f3416dcf1fad91cf799af400b9a4afa30c7a5b227b8839 +size 3103608152 diff --git a/model-00036-of-000051.safetensors b/model-00036-of-000051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ffeacdc2db2cee0d1dd57008626cd70a8ead9653 --- /dev/null +++ b/model-00036-of-000051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46b580d5e2bc27651b57b1b310bbd8c053f4da6c044f756fb5db78368b1fcfa1 +size 3103608152 diff --git a/model-00037-of-000051.safetensors b/model-00037-of-000051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..32f58a7994eb84840e4742fe51d43cd43a851355 --- /dev/null +++ b/model-00037-of-000051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1e133139c8e3a80a675625abb4aebed19621c3aa7e5db072850441f2bb9a1c9 +size 3103608152 diff --git a/model-00038-of-000051.safetensors b/model-00038-of-000051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..defda3b0c908b2c80d5f6e3a159944f34bd3acd6 --- /dev/null +++ b/model-00038-of-000051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:206234a10d3ea746595667909a6e6e0dc728d1de719f5bd126b1e54785603bdc +size 3103608152 diff --git a/model-00039-of-000051.safetensors b/model-00039-of-000051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a925dba29f5da0f14ba298f355f26188411ed842 --- /dev/null +++ b/model-00039-of-000051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:add0f4a5dd9bc63f900f604587c8b00f1f089dc5a8a6b39d751abf98ba11741d +size 3103608152 diff --git a/model-00040-of-000051.safetensors b/model-00040-of-000051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8e9af52aa0b0bdcd8a2a145f7f0fffb61a1d1ccf --- /dev/null +++ b/model-00040-of-000051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9bcf8b3349ccf97a88894e753ffb0232a41bfb78861e44fbc8037657cf4f3d3 +size 3103608152 diff --git a/model-00041-of-000051.safetensors b/model-00041-of-000051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a7b3bd0512e46b62253632d3dcb37f366e8247a9 --- /dev/null +++ b/model-00041-of-000051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91a0b83bd660237b58b923942182c3d130f18d7f63b0d6a0ff70eea03555409d +size 3103608152 diff --git a/model-00042-of-000051.safetensors b/model-00042-of-000051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..034e50c03af432e7c130f6cddf69fc193a83c080 --- /dev/null +++ b/model-00042-of-000051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce999af670ed92a1b0826ec17f05d1e787c57c5bdfacaddd0386f58f7b1a58d2 +size 3103608152 diff --git a/model-00043-of-000051.safetensors b/model-00043-of-000051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bccdc61664f909247b033f55e7c03fda9875bc98 --- /dev/null +++ b/model-00043-of-000051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d814324c61ee46c59f03425a22ba85831943b23f3ade9d5efe7dec0c8fc3851d +size 3103608152 diff --git a/model-00044-of-000051.safetensors b/model-00044-of-000051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..198131b259f0d92efaded232ac3b16e816aea40f --- /dev/null +++ b/model-00044-of-000051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68128a3631b0c3c05580350b2b8fc6aacfc11adbf7e25a06df624071619aa026 +size 3103608152 diff --git a/model-00045-of-000051.safetensors b/model-00045-of-000051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..042486a0d2eaca8c87b11bcf9be3932763bbad3e --- /dev/null +++ b/model-00045-of-000051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1cda178dca892015d852b7f2cb2677f1980ea14d923af2352f4c2f8a0eb6574 +size 3103608152 diff --git a/model-00046-of-000051.safetensors b/model-00046-of-000051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b9809bfb69126d38baab49bc60131e6f3727081d --- /dev/null +++ b/model-00046-of-000051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e25c7a05912871fa3874e270d8ca5c7259610e4b4f6f0b3e57306603b0ac1985 +size 3103608152 diff --git a/model-00047-of-000051.safetensors b/model-00047-of-000051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5923631b57423bd4c99a72b9abef18cef4b95d0f --- /dev/null +++ b/model-00047-of-000051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bb75e10be2ff98a6caf93af1caee89281144e5b955414d1ef68ff9ac5fd428a +size 3103608152 diff --git a/model-00048-of-000051.safetensors b/model-00048-of-000051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9515558fdf1129cbd084fa48ad6498bc3c5a4afb --- /dev/null +++ b/model-00048-of-000051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:750aa728a9a361c34841d55f6042c2da427183e8f320c6370939c0347bcf14b9 +size 3103608152 diff --git a/model-00049-of-000051.safetensors b/model-00049-of-000051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..56c558d4d895b160496a4291d6072531858c5f26 --- /dev/null +++ b/model-00049-of-000051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70b10aff5a78536a3fdf10f00380eaa4dcab583376b62456e2615c049f272c85 +size 3103608152 diff --git a/model-00050-of-000051.safetensors b/model-00050-of-000051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5e6b562bb5429064136470578ed8a2ee773295f8 --- /dev/null +++ b/model-00050-of-000051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c7e4217b571f525f50e0a27b699217619fcfc3a433962d8ef9255c4d086d6ce +size 4519195360 diff --git a/model-00051-of-000051.safetensors b/model-00051-of-000051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3ad768975cf93d6fdd4d223ca02fcf826e8a982e --- /dev/null +++ b/model-00051-of-000051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c414a63072a48e501b7b776959ea55a7532d75e1c7b00566282627ca699b13b +size 6019726520 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..d77f32df01a098452c9befd2f4aa215f84734e82 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,12002 @@ +{ + "metadata": { + "total_size": 153551464928 + }, + "weight_map": { + "model.embed_tokens.weight": "model-00001-of-000051.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-000051.safetensors", + "model.layers.0.pre_mlp_layernorm.weight": "model-00001-of-000051.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-000051.safetensors", + "model.layers.0.post_mlp_layernorm.weight": "model-00001-of-000051.safetensors", + "model.layers.0.self_attn.qkv_proj.weight": "model-00001-of-000051.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-000051.safetensors", + "model.layers.0.self_attn.k_layernorm.weight": "model-00001-of-000051.safetensors", + "model.layers.0.self_attn.param_sink_key": "model-00001-of-000051.safetensors", + "model.layers.0.self_attn.param_sink_value": "model-00001-of-000051.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-000051.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-000051.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-000051.safetensors", + "model.layers.1.input_layernorm.weight": "model-00002-of-000051.safetensors", + "model.layers.1.pre_mlp_layernorm.weight": "model-00002-of-000051.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00002-of-000051.safetensors", + "model.layers.1.post_mlp_layernorm.weight": "model-00002-of-000051.safetensors", + "model.layers.1.self_attn.qkv_proj.weight": "model-00002-of-000051.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00002-of-000051.safetensors", + "model.layers.1.self_attn.k_layernorm.weight": "model-00002-of-000051.safetensors", + "model.layers.1.self_attn.param_sink_key": "model-00002-of-000051.safetensors", + "model.layers.1.self_attn.param_sink_value": "model-00002-of-000051.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00002-of-000051.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00002-of-000051.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00002-of-000051.safetensors", + "model.layers.2.input_layernorm.weight": "model-00003-of-000051.safetensors", + "model.layers.2.pre_mlp_layernorm.weight": "model-00003-of-000051.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00003-of-000051.safetensors", + "model.layers.2.post_mlp_layernorm.weight": "model-00003-of-000051.safetensors", + "model.layers.2.self_attn.qkv_proj.weight": "model-00003-of-000051.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00003-of-000051.safetensors", + "model.layers.2.self_attn.k_layernorm.weight": "model-00003-of-000051.safetensors", + "model.layers.2.self_attn.param_sink_key": "model-00003-of-000051.safetensors", + "model.layers.2.self_attn.param_sink_value": "model-00003-of-000051.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00003-of-000051.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00003-of-000051.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00003-of-000051.safetensors", + "model.layers.3.input_layernorm.weight": "model-00004-of-000051.safetensors", + "model.layers.3.pre_mlp_layernorm.weight": "model-00004-of-000051.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00004-of-000051.safetensors", + "model.layers.3.post_mlp_layernorm.weight": "model-00004-of-000051.safetensors", + "model.layers.3.self_attn.qkv_proj.weight": "model-00004-of-000051.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00004-of-000051.safetensors", + "model.layers.3.self_attn.k_layernorm.weight": "model-00004-of-000051.safetensors", + "model.layers.3.self_attn.param_sink_key": "model-00004-of-000051.safetensors", + "model.layers.3.self_attn.param_sink_value": "model-00004-of-000051.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00004-of-000051.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00004-of-000051.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00004-of-000051.safetensors", + "model.layers.4.input_layernorm.weight": "model-00005-of-000051.safetensors", + "model.layers.4.pre_mlp_layernorm.weight": "model-00005-of-000051.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00005-of-000051.safetensors", + "model.layers.4.post_mlp_layernorm.weight": "model-00005-of-000051.safetensors", + "model.layers.4.self_attn.qkv_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.self_attn.k_layernorm.weight": "model-00005-of-000051.safetensors", + "model.layers.4.self_attn.param_sink_key": "model-00005-of-000051.safetensors", + "model.layers.4.self_attn.param_sink_value": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.gate.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.e_score_correction_bias": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.shared_experts.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.shared_experts.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.shared_experts.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.0.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.0.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.0.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.1.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.1.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.1.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.2.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.2.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.2.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.3.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.3.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.3.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.4.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.4.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.4.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.5.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.5.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.5.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.6.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.6.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.6.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.7.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.7.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.7.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.8.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.8.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.8.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.9.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.9.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.9.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.10.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.10.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.10.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.11.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.11.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.11.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.12.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.12.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.12.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.13.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.13.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.13.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.14.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.14.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.14.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.15.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.15.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.15.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.16.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.16.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.16.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.17.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.17.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.17.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.18.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.18.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.18.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.19.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.19.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.19.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.20.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.20.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.20.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.21.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.21.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.21.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.22.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.22.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.22.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.23.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.23.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.23.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.24.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.24.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.24.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.25.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.25.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.25.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.26.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.26.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.26.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.27.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.27.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.27.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.28.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.28.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.28.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.29.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.29.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.29.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.30.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.30.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.30.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.31.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.31.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.31.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.32.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.32.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.32.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.33.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.33.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.33.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.34.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.34.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.34.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.35.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.35.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.35.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.36.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.36.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.36.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.37.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.37.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.37.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.38.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.38.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.38.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.39.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.39.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.39.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.40.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.40.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.40.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.41.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.41.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.41.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.42.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.42.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.42.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.43.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.43.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.43.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.44.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.44.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.44.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.45.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.45.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.45.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.46.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.46.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.46.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.47.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.47.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.47.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.48.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.48.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.48.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.49.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.49.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.49.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.50.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.50.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.50.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.51.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.51.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.51.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.52.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.52.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.52.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.53.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.53.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.53.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.54.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.54.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.54.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.55.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.55.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.55.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.56.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.56.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.56.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.57.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.57.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.57.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.58.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.58.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.58.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.59.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.59.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.59.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.60.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.60.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.60.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.61.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.61.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.61.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.62.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.62.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.62.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.63.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.63.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.63.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.64.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.64.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.64.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.65.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.65.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.65.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.66.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.66.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.66.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.67.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.67.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.67.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.68.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.68.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.68.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.69.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.69.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.69.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.70.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.70.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.70.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.71.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.71.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.71.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.72.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.72.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.72.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.73.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.73.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.73.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.74.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.74.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.74.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.75.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.75.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.75.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.76.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.76.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.76.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.77.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.77.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.77.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.78.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.78.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.78.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.79.gate_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.79.up_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.4.mlp.experts.79.down_proj.weight": "model-00005-of-000051.safetensors", + "model.layers.5.input_layernorm.weight": "model-00006-of-000051.safetensors", + "model.layers.5.pre_mlp_layernorm.weight": "model-00006-of-000051.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00006-of-000051.safetensors", + "model.layers.5.post_mlp_layernorm.weight": "model-00006-of-000051.safetensors", + "model.layers.5.self_attn.qkv_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.self_attn.k_layernorm.weight": "model-00006-of-000051.safetensors", + "model.layers.5.self_attn.param_sink_key": "model-00006-of-000051.safetensors", + "model.layers.5.self_attn.param_sink_value": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.gate.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.e_score_correction_bias": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.shared_experts.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.shared_experts.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.shared_experts.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.0.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.0.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.0.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.1.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.1.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.1.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.2.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.2.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.2.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.3.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.3.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.3.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.4.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.4.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.4.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.5.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.5.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.5.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.6.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.6.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.6.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.7.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.7.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.7.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.8.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.8.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.8.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.9.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.9.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.9.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.10.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.10.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.10.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.11.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.11.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.11.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.12.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.12.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.12.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.13.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.13.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.13.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.14.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.14.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.14.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.15.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.15.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.15.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.16.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.16.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.16.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.17.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.17.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.17.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.18.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.18.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.18.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.19.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.19.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.19.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.20.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.20.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.20.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.21.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.21.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.21.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.22.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.22.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.22.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.23.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.23.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.23.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.24.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.24.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.24.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.25.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.25.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.25.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.26.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.26.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.26.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.27.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.27.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.27.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.28.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.28.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.28.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.29.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.29.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.29.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.30.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.30.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.30.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.31.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.31.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.31.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.32.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.32.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.32.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.33.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.33.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.33.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.34.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.34.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.34.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.35.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.35.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.35.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.36.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.36.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.36.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.37.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.37.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.37.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.38.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.38.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.38.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.39.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.39.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.39.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.40.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.40.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.40.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.41.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.41.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.41.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.42.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.42.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.42.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.43.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.43.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.43.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.44.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.44.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.44.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.45.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.45.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.45.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.46.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.46.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.46.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.47.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.47.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.47.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.48.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.48.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.48.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.49.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.49.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.49.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.50.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.50.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.50.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.51.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.51.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.51.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.52.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.52.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.52.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.53.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.53.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.53.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.54.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.54.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.54.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.55.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.55.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.55.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.56.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.56.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.56.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.57.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.57.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.57.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.58.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.58.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.58.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.59.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.59.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.59.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.60.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.60.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.60.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.61.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.61.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.61.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.62.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.62.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.62.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.63.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.63.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.63.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.64.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.64.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.64.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.65.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.65.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.65.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.66.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.66.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.66.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.67.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.67.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.67.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.68.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.68.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.68.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.69.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.69.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.69.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.70.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.70.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.70.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.71.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.71.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.71.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.72.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.72.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.72.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.73.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.73.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.73.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.74.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.74.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.74.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.75.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.75.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.75.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.76.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.76.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.76.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.77.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.77.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.77.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.78.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.78.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.78.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.79.gate_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.79.up_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.5.mlp.experts.79.down_proj.weight": "model-00006-of-000051.safetensors", + "model.layers.6.input_layernorm.weight": "model-00007-of-000051.safetensors", + "model.layers.6.pre_mlp_layernorm.weight": "model-00007-of-000051.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00007-of-000051.safetensors", + "model.layers.6.post_mlp_layernorm.weight": "model-00007-of-000051.safetensors", + "model.layers.6.self_attn.qkv_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.self_attn.k_layernorm.weight": "model-00007-of-000051.safetensors", + "model.layers.6.self_attn.param_sink_key": "model-00007-of-000051.safetensors", + "model.layers.6.self_attn.param_sink_value": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.gate.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.e_score_correction_bias": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.shared_experts.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.shared_experts.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.shared_experts.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.0.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.0.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.0.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.1.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.1.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.1.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.2.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.2.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.2.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.3.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.3.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.3.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.4.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.4.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.4.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.5.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.5.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.5.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.6.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.6.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.6.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.7.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.7.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.7.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.8.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.8.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.8.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.9.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.9.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.9.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.10.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.10.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.10.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.11.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.11.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.11.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.12.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.12.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.12.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.13.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.13.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.13.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.14.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.14.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.14.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.15.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.15.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.15.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.16.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.16.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.16.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.17.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.17.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.17.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.18.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.18.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.18.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.19.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.19.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.19.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.20.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.20.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.20.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.21.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.21.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.21.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.22.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.22.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.22.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.23.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.23.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.23.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.24.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.24.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.24.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.25.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.25.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.25.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.26.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.26.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.26.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.27.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.27.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.27.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.28.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.28.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.28.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.29.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.29.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.29.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.30.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.30.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.30.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.31.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.31.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.31.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.32.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.32.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.32.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.33.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.33.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.33.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.34.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.34.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.34.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.35.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.35.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.35.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.36.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.36.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.36.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.37.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.37.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.37.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.38.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.38.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.38.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.39.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.39.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.39.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.40.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.40.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.40.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.41.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.41.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.41.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.42.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.42.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.42.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.43.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.43.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.43.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.44.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.44.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.44.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.45.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.45.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.45.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.46.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.46.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.46.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.47.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.47.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.47.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.48.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.48.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.48.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.49.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.49.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.49.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.50.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.50.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.50.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.51.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.51.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.51.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.52.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.52.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.52.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.53.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.53.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.53.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.54.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.54.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.54.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.55.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.55.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.55.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.56.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.56.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.56.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.57.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.57.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.57.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.58.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.58.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.58.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.59.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.59.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.59.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.60.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.60.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.60.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.61.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.61.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.61.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.62.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.62.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.62.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.63.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.63.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.63.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.64.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.64.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.64.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.65.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.65.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.65.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.66.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.66.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.66.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.67.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.67.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.67.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.68.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.68.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.68.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.69.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.69.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.69.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.70.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.70.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.70.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.71.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.71.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.71.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.72.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.72.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.72.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.73.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.73.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.73.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.74.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.74.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.74.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.75.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.75.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.75.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.76.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.76.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.76.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.77.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.77.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.77.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.78.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.78.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.78.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.79.gate_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.79.up_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.6.mlp.experts.79.down_proj.weight": "model-00007-of-000051.safetensors", + "model.layers.7.input_layernorm.weight": "model-00008-of-000051.safetensors", + "model.layers.7.pre_mlp_layernorm.weight": "model-00008-of-000051.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00008-of-000051.safetensors", + "model.layers.7.post_mlp_layernorm.weight": "model-00008-of-000051.safetensors", + "model.layers.7.self_attn.qkv_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.self_attn.k_layernorm.weight": "model-00008-of-000051.safetensors", + "model.layers.7.self_attn.param_sink_key": "model-00008-of-000051.safetensors", + "model.layers.7.self_attn.param_sink_value": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.gate.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.e_score_correction_bias": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.shared_experts.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.shared_experts.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.shared_experts.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.0.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.0.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.0.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.1.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.1.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.1.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.2.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.2.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.2.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.3.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.3.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.3.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.4.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.4.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.4.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.5.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.5.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.5.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.6.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.6.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.6.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.7.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.7.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.7.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.8.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.8.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.8.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.9.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.9.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.9.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.10.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.10.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.10.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.11.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.11.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.11.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.12.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.12.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.12.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.13.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.13.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.13.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.14.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.14.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.14.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.15.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.15.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.15.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.16.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.16.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.16.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.17.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.17.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.17.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.18.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.18.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.18.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.19.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.19.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.19.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.20.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.20.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.20.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.21.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.21.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.21.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.22.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.22.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.22.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.23.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.23.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.23.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.24.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.24.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.24.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.25.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.25.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.25.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.26.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.26.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.26.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.27.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.27.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.27.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.28.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.28.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.28.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.29.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.29.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.29.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.30.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.30.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.30.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.31.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.31.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.31.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.32.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.32.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.32.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.33.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.33.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.33.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.34.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.34.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.34.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.35.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.35.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.35.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.36.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.36.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.36.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.37.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.37.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.37.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.38.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.38.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.38.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.39.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.39.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.39.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.40.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.40.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.40.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.41.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.41.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.41.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.42.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.42.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.42.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.43.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.43.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.43.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.44.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.44.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.44.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.45.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.45.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.45.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.46.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.46.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.46.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.47.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.47.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.47.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.48.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.48.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.48.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.49.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.49.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.49.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.50.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.50.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.50.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.51.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.51.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.51.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.52.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.52.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.52.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.53.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.53.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.53.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.54.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.54.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.54.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.55.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.55.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.55.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.56.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.56.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.56.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.57.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.57.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.57.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.58.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.58.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.58.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.59.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.59.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.59.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.60.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.60.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.60.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.61.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.61.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.61.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.62.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.62.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.62.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.63.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.63.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.63.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.64.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.64.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.64.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.65.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.65.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.65.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.66.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.66.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.66.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.67.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.67.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.67.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.68.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.68.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.68.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.69.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.69.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.69.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.70.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.70.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.70.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.71.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.71.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.71.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.72.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.72.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.72.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.73.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.73.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.73.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.74.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.74.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.74.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.75.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.75.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.75.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.76.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.76.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.76.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.77.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.77.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.77.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.78.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.78.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.78.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.79.gate_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.79.up_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.7.mlp.experts.79.down_proj.weight": "model-00008-of-000051.safetensors", + "model.layers.8.input_layernorm.weight": "model-00009-of-000051.safetensors", + "model.layers.8.pre_mlp_layernorm.weight": "model-00009-of-000051.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00009-of-000051.safetensors", + "model.layers.8.post_mlp_layernorm.weight": "model-00009-of-000051.safetensors", + "model.layers.8.self_attn.qkv_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.self_attn.k_layernorm.weight": "model-00009-of-000051.safetensors", + "model.layers.8.self_attn.param_sink_key": "model-00009-of-000051.safetensors", + "model.layers.8.self_attn.param_sink_value": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.gate.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.e_score_correction_bias": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.shared_experts.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.shared_experts.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.shared_experts.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.0.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.0.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.0.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.1.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.1.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.1.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.2.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.2.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.2.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.3.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.3.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.3.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.4.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.4.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.4.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.5.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.5.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.5.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.6.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.6.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.6.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.7.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.7.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.7.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.8.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.8.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.8.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.9.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.9.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.9.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.10.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.10.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.10.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.11.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.11.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.11.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.12.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.12.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.12.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.13.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.13.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.13.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.14.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.14.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.14.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.15.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.15.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.15.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.16.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.16.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.16.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.17.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.17.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.17.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.18.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.18.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.18.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.19.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.19.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.19.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.20.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.20.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.20.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.21.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.21.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.21.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.22.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.22.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.22.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.23.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.23.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.23.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.24.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.24.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.24.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.25.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.25.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.25.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.26.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.26.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.26.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.27.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.27.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.27.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.28.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.28.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.28.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.29.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.29.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.29.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.30.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.30.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.30.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.31.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.31.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.31.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.32.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.32.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.32.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.33.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.33.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.33.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.34.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.34.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.34.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.35.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.35.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.35.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.36.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.36.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.36.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.37.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.37.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.37.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.38.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.38.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.38.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.39.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.39.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.39.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.40.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.40.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.40.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.41.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.41.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.41.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.42.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.42.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.42.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.43.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.43.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.43.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.44.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.44.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.44.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.45.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.45.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.45.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.46.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.46.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.46.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.47.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.47.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.47.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.48.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.48.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.48.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.49.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.49.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.49.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.50.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.50.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.50.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.51.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.51.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.51.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.52.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.52.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.52.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.53.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.53.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.53.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.54.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.54.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.54.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.55.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.55.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.55.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.56.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.56.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.56.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.57.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.57.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.57.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.58.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.58.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.58.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.59.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.59.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.59.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.60.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.60.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.60.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.61.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.61.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.61.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.62.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.62.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.62.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.63.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.63.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.63.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.64.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.64.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.64.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.65.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.65.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.65.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.66.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.66.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.66.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.67.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.67.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.67.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.68.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.68.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.68.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.69.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.69.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.69.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.70.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.70.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.70.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.71.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.71.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.71.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.72.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.72.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.72.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.73.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.73.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.73.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.74.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.74.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.74.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.75.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.75.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.75.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.76.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.76.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.76.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.77.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.77.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.77.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.78.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.78.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.78.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.79.gate_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.79.up_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.8.mlp.experts.79.down_proj.weight": "model-00009-of-000051.safetensors", + "model.layers.9.input_layernorm.weight": "model-00010-of-000051.safetensors", + "model.layers.9.pre_mlp_layernorm.weight": "model-00010-of-000051.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00010-of-000051.safetensors", + "model.layers.9.post_mlp_layernorm.weight": "model-00010-of-000051.safetensors", + "model.layers.9.self_attn.qkv_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.self_attn.k_layernorm.weight": "model-00010-of-000051.safetensors", + "model.layers.9.self_attn.param_sink_key": "model-00010-of-000051.safetensors", + "model.layers.9.self_attn.param_sink_value": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.gate.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.e_score_correction_bias": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.shared_experts.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.shared_experts.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.shared_experts.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.0.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.0.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.0.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.1.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.1.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.1.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.2.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.2.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.2.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.3.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.3.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.3.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.4.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.4.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.4.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.5.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.5.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.5.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.6.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.6.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.6.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.7.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.7.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.7.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.8.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.8.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.8.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.9.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.9.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.9.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.10.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.10.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.10.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.11.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.11.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.11.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.12.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.12.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.12.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.13.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.13.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.13.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.14.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.14.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.14.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.15.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.15.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.15.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.16.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.16.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.16.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.17.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.17.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.17.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.18.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.18.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.18.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.19.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.19.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.19.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.20.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.20.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.20.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.21.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.21.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.21.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.22.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.22.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.22.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.23.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.23.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.23.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.24.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.24.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.24.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.25.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.25.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.25.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.26.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.26.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.26.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.27.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.27.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.27.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.28.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.28.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.28.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.29.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.29.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.29.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.30.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.30.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.30.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.31.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.31.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.31.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.32.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.32.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.32.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.33.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.33.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.33.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.34.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.34.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.34.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.35.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.35.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.35.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.36.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.36.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.36.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.37.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.37.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.37.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.38.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.38.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.38.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.39.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.39.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.39.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.40.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.40.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.40.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.41.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.41.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.41.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.42.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.42.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.42.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.43.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.43.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.43.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.44.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.44.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.44.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.45.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.45.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.45.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.46.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.46.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.46.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.47.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.47.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.47.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.48.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.48.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.48.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.49.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.49.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.49.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.50.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.50.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.50.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.51.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.51.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.51.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.52.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.52.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.52.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.53.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.53.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.53.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.54.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.54.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.54.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.55.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.55.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.55.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.56.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.56.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.56.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.57.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.57.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.57.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.58.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.58.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.58.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.59.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.59.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.59.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.60.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.60.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.60.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.61.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.61.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.61.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.62.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.62.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.62.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.63.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.63.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.63.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.64.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.64.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.64.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.65.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.65.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.65.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.66.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.66.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.66.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.67.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.67.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.67.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.68.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.68.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.68.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.69.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.69.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.69.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.70.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.70.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.70.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.71.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.71.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.71.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.72.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.72.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.72.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.73.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.73.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.73.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.74.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.74.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.74.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.75.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.75.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.75.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.76.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.76.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.76.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.77.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.77.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.77.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.78.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.78.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.78.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.79.gate_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.79.up_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.9.mlp.experts.79.down_proj.weight": "model-00010-of-000051.safetensors", + "model.layers.10.input_layernorm.weight": "model-00011-of-000051.safetensors", + "model.layers.10.pre_mlp_layernorm.weight": "model-00011-of-000051.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00011-of-000051.safetensors", + "model.layers.10.post_mlp_layernorm.weight": "model-00011-of-000051.safetensors", + "model.layers.10.self_attn.qkv_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.self_attn.k_layernorm.weight": "model-00011-of-000051.safetensors", + "model.layers.10.self_attn.param_sink_key": "model-00011-of-000051.safetensors", + "model.layers.10.self_attn.param_sink_value": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.gate.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.e_score_correction_bias": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.shared_experts.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.shared_experts.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.shared_experts.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.0.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.0.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.0.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.1.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.1.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.1.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.2.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.2.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.2.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.3.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.3.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.3.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.4.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.4.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.4.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.5.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.5.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.5.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.6.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.6.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.6.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.7.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.7.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.7.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.8.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.8.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.8.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.9.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.9.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.9.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.10.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.10.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.10.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.11.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.11.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.11.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.12.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.12.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.12.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.13.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.13.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.13.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.14.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.14.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.14.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.15.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.15.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.15.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.16.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.16.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.16.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.17.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.17.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.17.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.18.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.18.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.18.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.19.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.19.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.19.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.20.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.20.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.20.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.21.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.21.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.21.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.22.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.22.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.22.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.23.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.23.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.23.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.24.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.24.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.24.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.25.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.25.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.25.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.26.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.26.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.26.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.27.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.27.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.27.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.28.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.28.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.28.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.29.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.29.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.29.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.30.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.30.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.30.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.31.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.31.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.31.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.32.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.32.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.32.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.33.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.33.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.33.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.34.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.34.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.34.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.35.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.35.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.35.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.36.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.36.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.36.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.37.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.37.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.37.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.38.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.38.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.38.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.39.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.39.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.39.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.40.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.40.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.40.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.41.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.41.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.41.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.42.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.42.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.42.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.43.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.43.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.43.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.44.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.44.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.44.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.45.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.45.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.45.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.46.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.46.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.46.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.47.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.47.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.47.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.48.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.48.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.48.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.49.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.49.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.49.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.50.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.50.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.50.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.51.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.51.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.51.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.52.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.52.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.52.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.53.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.53.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.53.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.54.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.54.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.54.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.55.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.55.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.55.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.56.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.56.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.56.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.57.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.57.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.57.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.58.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.58.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.58.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.59.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.59.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.59.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.60.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.60.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.60.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.61.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.61.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.61.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.62.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.62.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.62.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.63.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.63.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.63.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.64.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.64.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.64.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.65.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.65.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.65.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.66.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.66.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.66.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.67.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.67.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.67.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.68.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.68.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.68.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.69.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.69.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.69.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.70.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.70.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.70.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.71.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.71.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.71.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.72.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.72.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.72.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.73.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.73.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.73.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.74.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.74.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.74.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.75.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.75.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.75.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.76.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.76.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.76.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.77.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.77.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.77.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.78.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.78.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.78.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.79.gate_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.79.up_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.10.mlp.experts.79.down_proj.weight": "model-00011-of-000051.safetensors", + "model.layers.11.input_layernorm.weight": "model-00012-of-000051.safetensors", + "model.layers.11.pre_mlp_layernorm.weight": "model-00012-of-000051.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00012-of-000051.safetensors", + "model.layers.11.post_mlp_layernorm.weight": "model-00012-of-000051.safetensors", + "model.layers.11.self_attn.qkv_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.self_attn.k_layernorm.weight": "model-00012-of-000051.safetensors", + "model.layers.11.self_attn.param_sink_key": "model-00012-of-000051.safetensors", + "model.layers.11.self_attn.param_sink_value": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.gate.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.e_score_correction_bias": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.shared_experts.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.shared_experts.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.shared_experts.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.0.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.0.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.0.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.1.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.1.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.1.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.2.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.2.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.2.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.3.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.3.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.3.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.4.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.4.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.4.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.5.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.5.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.5.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.6.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.6.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.6.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.7.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.7.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.7.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.8.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.8.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.8.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.9.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.9.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.9.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.10.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.10.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.10.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.11.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.11.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.11.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.12.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.12.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.12.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.13.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.13.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.13.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.14.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.14.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.14.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.15.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.15.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.15.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.16.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.16.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.16.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.17.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.17.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.17.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.18.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.18.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.18.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.19.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.19.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.19.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.20.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.20.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.20.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.21.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.21.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.21.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.22.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.22.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.22.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.23.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.23.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.23.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.24.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.24.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.24.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.25.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.25.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.25.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.26.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.26.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.26.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.27.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.27.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.27.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.28.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.28.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.28.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.29.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.29.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.29.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.30.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.30.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.30.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.31.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.31.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.31.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.32.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.32.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.32.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.33.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.33.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.33.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.34.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.34.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.34.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.35.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.35.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.35.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.36.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.36.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.36.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.37.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.37.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.37.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.38.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.38.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.38.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.39.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.39.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.39.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.40.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.40.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.40.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.41.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.41.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.41.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.42.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.42.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.42.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.43.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.43.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.43.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.44.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.44.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.44.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.45.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.45.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.45.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.46.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.46.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.46.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.47.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.47.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.47.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.48.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.48.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.48.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.49.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.49.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.49.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.50.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.50.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.50.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.51.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.51.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.51.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.52.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.52.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.52.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.53.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.53.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.53.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.54.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.54.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.54.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.55.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.55.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.55.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.56.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.56.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.56.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.57.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.57.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.57.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.58.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.58.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.58.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.59.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.59.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.59.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.60.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.60.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.60.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.61.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.61.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.61.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.62.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.62.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.62.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.63.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.63.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.63.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.64.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.64.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.64.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.65.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.65.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.65.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.66.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.66.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.66.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.67.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.67.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.67.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.68.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.68.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.68.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.69.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.69.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.69.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.70.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.70.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.70.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.71.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.71.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.71.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.72.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.72.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.72.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.73.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.73.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.73.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.74.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.74.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.74.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.75.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.75.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.75.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.76.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.76.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.76.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.77.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.77.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.77.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.78.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.78.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.78.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.79.gate_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.79.up_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.11.mlp.experts.79.down_proj.weight": "model-00012-of-000051.safetensors", + "model.layers.12.input_layernorm.weight": "model-00013-of-000051.safetensors", + "model.layers.12.pre_mlp_layernorm.weight": "model-00013-of-000051.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00013-of-000051.safetensors", + "model.layers.12.post_mlp_layernorm.weight": "model-00013-of-000051.safetensors", + "model.layers.12.self_attn.qkv_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.self_attn.k_layernorm.weight": "model-00013-of-000051.safetensors", + "model.layers.12.self_attn.param_sink_key": "model-00013-of-000051.safetensors", + "model.layers.12.self_attn.param_sink_value": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.gate.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.e_score_correction_bias": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.shared_experts.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.shared_experts.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.shared_experts.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.0.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.0.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.0.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.1.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.1.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.1.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.2.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.2.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.2.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.3.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.3.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.3.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.4.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.4.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.4.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.5.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.5.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.5.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.6.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.6.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.6.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.7.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.7.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.7.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.8.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.8.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.8.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.9.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.9.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.9.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.10.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.10.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.10.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.11.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.11.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.11.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.12.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.12.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.12.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.13.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.13.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.13.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.14.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.14.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.14.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.15.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.15.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.15.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.16.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.16.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.16.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.17.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.17.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.17.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.18.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.18.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.18.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.19.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.19.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.19.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.20.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.20.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.20.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.21.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.21.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.21.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.22.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.22.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.22.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.23.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.23.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.23.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.24.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.24.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.24.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.25.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.25.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.25.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.26.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.26.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.26.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.27.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.27.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.27.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.28.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.28.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.28.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.29.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.29.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.29.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.30.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.30.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.30.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.31.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.31.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.31.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.32.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.32.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.32.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.33.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.33.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.33.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.34.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.34.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.34.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.35.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.35.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.35.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.36.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.36.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.36.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.37.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.37.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.37.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.38.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.38.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.38.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.39.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.39.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.39.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.40.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.40.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.40.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.41.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.41.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.41.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.42.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.42.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.42.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.43.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.43.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.43.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.44.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.44.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.44.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.45.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.45.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.45.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.46.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.46.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.46.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.47.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.47.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.47.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.48.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.48.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.48.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.49.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.49.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.49.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.50.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.50.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.50.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.51.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.51.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.51.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.52.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.52.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.52.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.53.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.53.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.53.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.54.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.54.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.54.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.55.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.55.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.55.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.56.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.56.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.56.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.57.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.57.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.57.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.58.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.58.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.58.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.59.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.59.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.59.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.60.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.60.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.60.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.61.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.61.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.61.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.62.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.62.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.62.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.63.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.63.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.63.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.64.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.64.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.64.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.65.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.65.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.65.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.66.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.66.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.66.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.67.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.67.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.67.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.68.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.68.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.68.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.69.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.69.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.69.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.70.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.70.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.70.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.71.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.71.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.71.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.72.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.72.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.72.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.73.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.73.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.73.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.74.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.74.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.74.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.75.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.75.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.75.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.76.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.76.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.76.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.77.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.77.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.77.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.78.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.78.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.78.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.79.gate_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.79.up_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.12.mlp.experts.79.down_proj.weight": "model-00013-of-000051.safetensors", + "model.layers.13.input_layernorm.weight": "model-00014-of-000051.safetensors", + "model.layers.13.pre_mlp_layernorm.weight": "model-00014-of-000051.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00014-of-000051.safetensors", + "model.layers.13.post_mlp_layernorm.weight": "model-00014-of-000051.safetensors", + "model.layers.13.self_attn.qkv_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.self_attn.k_layernorm.weight": "model-00014-of-000051.safetensors", + "model.layers.13.self_attn.param_sink_key": "model-00014-of-000051.safetensors", + "model.layers.13.self_attn.param_sink_value": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.gate.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.e_score_correction_bias": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.shared_experts.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.shared_experts.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.shared_experts.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.0.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.0.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.0.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.1.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.1.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.1.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.2.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.2.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.2.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.3.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.3.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.3.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.4.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.4.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.4.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.5.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.5.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.5.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.6.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.6.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.6.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.7.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.7.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.7.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.8.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.8.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.8.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.9.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.9.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.9.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.10.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.10.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.10.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.11.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.11.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.11.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.12.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.12.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.12.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.13.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.13.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.13.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.14.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.14.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.14.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.15.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.15.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.15.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.16.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.16.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.16.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.17.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.17.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.17.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.18.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.18.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.18.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.19.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.19.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.19.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.20.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.20.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.20.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.21.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.21.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.21.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.22.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.22.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.22.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.23.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.23.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.23.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.24.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.24.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.24.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.25.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.25.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.25.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.26.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.26.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.26.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.27.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.27.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.27.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.28.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.28.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.28.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.29.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.29.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.29.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.30.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.30.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.30.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.31.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.31.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.31.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.32.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.32.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.32.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.33.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.33.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.33.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.34.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.34.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.34.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.35.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.35.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.35.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.36.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.36.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.36.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.37.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.37.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.37.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.38.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.38.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.38.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.39.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.39.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.39.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.40.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.40.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.40.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.41.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.41.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.41.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.42.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.42.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.42.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.43.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.43.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.43.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.44.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.44.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.44.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.45.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.45.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.45.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.46.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.46.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.46.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.47.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.47.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.47.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.48.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.48.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.48.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.49.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.49.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.49.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.50.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.50.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.50.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.51.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.51.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.51.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.52.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.52.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.52.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.53.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.53.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.53.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.54.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.54.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.54.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.55.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.55.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.55.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.56.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.56.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.56.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.57.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.57.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.57.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.58.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.58.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.58.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.59.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.59.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.59.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.60.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.60.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.60.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.61.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.61.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.61.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.62.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.62.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.62.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.63.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.63.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.63.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.64.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.64.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.64.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.65.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.65.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.65.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.66.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.66.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.66.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.67.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.67.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.67.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.68.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.68.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.68.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.69.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.69.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.69.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.70.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.70.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.70.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.71.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.71.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.71.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.72.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.72.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.72.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.73.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.73.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.73.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.74.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.74.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.74.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.75.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.75.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.75.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.76.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.76.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.76.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.77.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.77.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.77.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.78.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.78.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.78.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.79.gate_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.79.up_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.13.mlp.experts.79.down_proj.weight": "model-00014-of-000051.safetensors", + "model.layers.14.input_layernorm.weight": "model-00015-of-000051.safetensors", + "model.layers.14.pre_mlp_layernorm.weight": "model-00015-of-000051.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00015-of-000051.safetensors", + "model.layers.14.post_mlp_layernorm.weight": "model-00015-of-000051.safetensors", + "model.layers.14.self_attn.qkv_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.self_attn.k_layernorm.weight": "model-00015-of-000051.safetensors", + "model.layers.14.self_attn.param_sink_key": "model-00015-of-000051.safetensors", + "model.layers.14.self_attn.param_sink_value": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.gate.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.e_score_correction_bias": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.shared_experts.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.shared_experts.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.shared_experts.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.0.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.0.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.0.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.1.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.1.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.1.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.2.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.2.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.2.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.3.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.3.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.3.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.4.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.4.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.4.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.5.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.5.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.5.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.6.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.6.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.6.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.7.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.7.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.7.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.8.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.8.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.8.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.9.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.9.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.9.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.10.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.10.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.10.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.11.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.11.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.11.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.12.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.12.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.12.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.13.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.13.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.13.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.14.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.14.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.14.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.15.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.15.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.15.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.16.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.16.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.16.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.17.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.17.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.17.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.18.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.18.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.18.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.19.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.19.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.19.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.20.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.20.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.20.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.21.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.21.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.21.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.22.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.22.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.22.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.23.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.23.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.23.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.24.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.24.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.24.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.25.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.25.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.25.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.26.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.26.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.26.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.27.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.27.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.27.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.28.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.28.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.28.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.29.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.29.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.29.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.30.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.30.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.30.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.31.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.31.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.31.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.32.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.32.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.32.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.33.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.33.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.33.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.34.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.34.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.34.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.35.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.35.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.35.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.36.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.36.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.36.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.37.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.37.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.37.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.38.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.38.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.38.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.39.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.39.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.39.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.40.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.40.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.40.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.41.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.41.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.41.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.42.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.42.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.42.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.43.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.43.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.43.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.44.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.44.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.44.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.45.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.45.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.45.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.46.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.46.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.46.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.47.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.47.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.47.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.48.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.48.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.48.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.49.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.49.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.49.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.50.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.50.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.50.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.51.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.51.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.51.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.52.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.52.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.52.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.53.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.53.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.53.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.54.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.54.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.54.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.55.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.55.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.55.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.56.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.56.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.56.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.57.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.57.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.57.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.58.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.58.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.58.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.59.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.59.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.59.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.60.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.60.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.60.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.61.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.61.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.61.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.62.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.62.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.62.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.63.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.63.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.63.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.64.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.64.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.64.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.65.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.65.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.65.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.66.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.66.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.66.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.67.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.67.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.67.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.68.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.68.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.68.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.69.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.69.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.69.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.70.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.70.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.70.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.71.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.71.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.71.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.72.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.72.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.72.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.73.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.73.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.73.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.74.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.74.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.74.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.75.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.75.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.75.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.76.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.76.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.76.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.77.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.77.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.77.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.78.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.78.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.78.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.79.gate_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.79.up_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.14.mlp.experts.79.down_proj.weight": "model-00015-of-000051.safetensors", + "model.layers.15.input_layernorm.weight": "model-00016-of-000051.safetensors", + "model.layers.15.pre_mlp_layernorm.weight": "model-00016-of-000051.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00016-of-000051.safetensors", + "model.layers.15.post_mlp_layernorm.weight": "model-00016-of-000051.safetensors", + "model.layers.15.self_attn.qkv_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.self_attn.k_layernorm.weight": "model-00016-of-000051.safetensors", + "model.layers.15.self_attn.param_sink_key": "model-00016-of-000051.safetensors", + "model.layers.15.self_attn.param_sink_value": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.gate.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.e_score_correction_bias": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.shared_experts.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.shared_experts.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.shared_experts.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.0.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.0.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.0.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.1.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.1.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.1.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.2.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.2.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.2.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.3.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.3.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.3.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.4.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.4.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.4.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.5.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.5.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.5.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.6.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.6.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.6.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.7.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.7.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.7.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.8.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.8.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.8.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.9.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.9.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.9.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.10.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.10.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.10.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.11.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.11.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.11.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.12.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.12.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.12.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.13.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.13.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.13.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.14.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.14.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.14.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.15.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.15.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.15.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.16.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.16.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.16.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.17.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.17.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.17.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.18.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.18.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.18.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.19.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.19.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.19.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.20.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.20.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.20.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.21.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.21.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.21.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.22.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.22.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.22.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.23.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.23.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.23.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.24.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.24.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.24.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.25.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.25.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.25.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.26.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.26.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.26.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.27.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.27.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.27.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.28.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.28.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.28.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.29.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.29.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.29.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.30.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.30.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.30.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.31.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.31.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.31.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.32.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.32.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.32.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.33.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.33.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.33.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.34.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.34.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.34.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.35.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.35.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.35.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.36.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.36.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.36.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.37.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.37.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.37.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.38.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.38.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.38.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.39.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.39.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.39.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.40.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.40.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.40.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.41.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.41.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.41.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.42.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.42.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.42.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.43.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.43.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.43.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.44.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.44.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.44.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.45.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.45.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.45.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.46.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.46.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.46.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.47.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.47.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.47.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.48.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.48.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.48.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.49.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.49.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.49.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.50.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.50.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.50.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.51.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.51.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.51.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.52.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.52.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.52.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.53.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.53.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.53.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.54.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.54.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.54.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.55.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.55.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.55.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.56.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.56.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.56.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.57.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.57.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.57.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.58.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.58.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.58.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.59.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.59.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.59.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.60.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.60.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.60.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.61.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.61.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.61.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.62.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.62.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.62.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.63.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.63.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.63.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.64.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.64.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.64.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.65.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.65.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.65.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.66.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.66.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.66.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.67.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.67.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.67.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.68.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.68.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.68.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.69.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.69.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.69.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.70.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.70.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.70.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.71.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.71.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.71.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.72.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.72.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.72.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.73.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.73.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.73.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.74.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.74.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.74.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.75.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.75.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.75.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.76.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.76.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.76.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.77.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.77.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.77.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.78.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.78.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.78.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.79.gate_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.79.up_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.15.mlp.experts.79.down_proj.weight": "model-00016-of-000051.safetensors", + "model.layers.16.input_layernorm.weight": "model-00017-of-000051.safetensors", + "model.layers.16.pre_mlp_layernorm.weight": "model-00017-of-000051.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00017-of-000051.safetensors", + "model.layers.16.post_mlp_layernorm.weight": "model-00017-of-000051.safetensors", + "model.layers.16.self_attn.qkv_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.self_attn.k_layernorm.weight": "model-00017-of-000051.safetensors", + "model.layers.16.self_attn.param_sink_key": "model-00017-of-000051.safetensors", + "model.layers.16.self_attn.param_sink_value": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.gate.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.e_score_correction_bias": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.shared_experts.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.shared_experts.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.shared_experts.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.0.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.0.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.0.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.1.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.1.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.1.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.2.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.2.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.2.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.3.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.3.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.3.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.4.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.4.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.4.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.5.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.5.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.5.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.6.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.6.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.6.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.7.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.7.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.7.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.8.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.8.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.8.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.9.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.9.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.9.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.10.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.10.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.10.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.11.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.11.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.11.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.12.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.12.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.12.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.13.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.13.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.13.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.14.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.14.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.14.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.15.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.15.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.15.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.16.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.16.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.16.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.17.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.17.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.17.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.18.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.18.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.18.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.19.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.19.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.19.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.20.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.20.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.20.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.21.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.21.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.21.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.22.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.22.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.22.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.23.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.23.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.23.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.24.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.24.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.24.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.25.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.25.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.25.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.26.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.26.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.26.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.27.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.27.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.27.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.28.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.28.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.28.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.29.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.29.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.29.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.30.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.30.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.30.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.31.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.31.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.31.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.32.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.32.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.32.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.33.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.33.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.33.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.34.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.34.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.34.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.35.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.35.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.35.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.36.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.36.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.36.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.37.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.37.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.37.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.38.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.38.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.38.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.39.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.39.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.39.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.40.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.40.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.40.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.41.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.41.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.41.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.42.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.42.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.42.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.43.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.43.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.43.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.44.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.44.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.44.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.45.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.45.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.45.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.46.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.46.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.46.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.47.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.47.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.47.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.48.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.48.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.48.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.49.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.49.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.49.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.50.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.50.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.50.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.51.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.51.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.51.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.52.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.52.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.52.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.53.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.53.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.53.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.54.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.54.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.54.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.55.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.55.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.55.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.56.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.56.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.56.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.57.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.57.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.57.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.58.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.58.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.58.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.59.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.59.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.59.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.60.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.60.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.60.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.61.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.61.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.61.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.62.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.62.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.62.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.63.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.63.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.63.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.64.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.64.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.64.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.65.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.65.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.65.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.66.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.66.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.66.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.67.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.67.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.67.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.68.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.68.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.68.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.69.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.69.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.69.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.70.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.70.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.70.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.71.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.71.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.71.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.72.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.72.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.72.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.73.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.73.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.73.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.74.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.74.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.74.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.75.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.75.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.75.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.76.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.76.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.76.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.77.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.77.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.77.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.78.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.78.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.78.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.79.gate_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.79.up_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.16.mlp.experts.79.down_proj.weight": "model-00017-of-000051.safetensors", + "model.layers.17.input_layernorm.weight": "model-00018-of-000051.safetensors", + "model.layers.17.pre_mlp_layernorm.weight": "model-00018-of-000051.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00018-of-000051.safetensors", + "model.layers.17.post_mlp_layernorm.weight": "model-00018-of-000051.safetensors", + "model.layers.17.self_attn.qkv_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.self_attn.k_layernorm.weight": "model-00018-of-000051.safetensors", + "model.layers.17.self_attn.param_sink_key": "model-00018-of-000051.safetensors", + "model.layers.17.self_attn.param_sink_value": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.gate.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.e_score_correction_bias": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.shared_experts.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.shared_experts.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.shared_experts.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.0.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.0.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.0.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.1.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.1.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.1.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.2.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.2.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.2.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.3.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.3.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.3.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.4.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.4.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.4.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.5.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.5.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.5.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.6.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.6.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.6.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.7.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.7.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.7.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.8.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.8.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.8.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.9.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.9.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.9.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.10.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.10.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.10.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.11.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.11.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.11.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.12.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.12.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.12.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.13.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.13.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.13.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.14.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.14.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.14.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.15.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.15.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.15.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.16.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.16.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.16.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.17.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.17.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.17.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.18.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.18.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.18.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.19.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.19.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.19.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.20.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.20.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.20.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.21.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.21.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.21.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.22.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.22.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.22.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.23.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.23.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.23.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.24.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.24.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.24.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.25.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.25.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.25.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.26.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.26.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.26.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.27.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.27.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.27.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.28.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.28.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.28.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.29.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.29.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.29.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.30.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.30.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.30.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.31.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.31.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.31.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.32.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.32.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.32.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.33.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.33.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.33.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.34.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.34.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.34.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.35.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.35.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.35.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.36.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.36.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.36.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.37.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.37.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.37.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.38.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.38.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.38.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.39.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.39.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.39.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.40.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.40.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.40.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.41.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.41.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.41.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.42.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.42.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.42.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.43.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.43.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.43.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.44.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.44.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.44.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.45.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.45.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.45.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.46.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.46.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.46.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.47.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.47.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.47.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.48.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.48.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.48.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.49.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.49.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.49.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.50.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.50.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.50.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.51.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.51.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.51.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.52.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.52.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.52.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.53.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.53.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.53.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.54.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.54.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.54.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.55.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.55.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.55.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.56.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.56.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.56.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.57.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.57.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.57.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.58.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.58.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.58.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.59.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.59.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.59.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.60.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.60.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.60.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.61.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.61.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.61.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.62.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.62.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.62.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.63.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.63.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.63.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.64.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.64.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.64.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.65.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.65.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.65.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.66.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.66.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.66.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.67.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.67.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.67.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.68.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.68.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.68.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.69.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.69.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.69.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.70.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.70.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.70.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.71.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.71.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.71.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.72.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.72.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.72.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.73.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.73.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.73.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.74.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.74.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.74.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.75.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.75.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.75.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.76.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.76.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.76.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.77.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.77.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.77.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.78.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.78.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.78.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.79.gate_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.79.up_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.17.mlp.experts.79.down_proj.weight": "model-00018-of-000051.safetensors", + "model.layers.18.input_layernorm.weight": "model-00019-of-000051.safetensors", + "model.layers.18.pre_mlp_layernorm.weight": "model-00019-of-000051.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00019-of-000051.safetensors", + "model.layers.18.post_mlp_layernorm.weight": "model-00019-of-000051.safetensors", + "model.layers.18.self_attn.qkv_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.self_attn.k_layernorm.weight": "model-00019-of-000051.safetensors", + "model.layers.18.self_attn.param_sink_key": "model-00019-of-000051.safetensors", + "model.layers.18.self_attn.param_sink_value": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.gate.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.e_score_correction_bias": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.shared_experts.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.shared_experts.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.shared_experts.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.0.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.0.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.0.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.1.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.1.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.1.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.2.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.2.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.2.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.3.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.3.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.3.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.4.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.4.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.4.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.5.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.5.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.5.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.6.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.6.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.6.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.7.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.7.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.7.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.8.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.8.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.8.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.9.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.9.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.9.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.10.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.10.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.10.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.11.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.11.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.11.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.12.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.12.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.12.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.13.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.13.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.13.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.14.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.14.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.14.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.15.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.15.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.15.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.16.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.16.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.16.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.17.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.17.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.17.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.18.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.18.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.18.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.19.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.19.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.19.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.20.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.20.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.20.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.21.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.21.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.21.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.22.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.22.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.22.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.23.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.23.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.23.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.24.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.24.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.24.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.25.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.25.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.25.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.26.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.26.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.26.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.27.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.27.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.27.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.28.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.28.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.28.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.29.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.29.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.29.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.30.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.30.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.30.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.31.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.31.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.31.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.32.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.32.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.32.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.33.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.33.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.33.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.34.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.34.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.34.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.35.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.35.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.35.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.36.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.36.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.36.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.37.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.37.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.37.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.38.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.38.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.38.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.39.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.39.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.39.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.40.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.40.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.40.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.41.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.41.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.41.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.42.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.42.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.42.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.43.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.43.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.43.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.44.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.44.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.44.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.45.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.45.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.45.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.46.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.46.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.46.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.47.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.47.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.47.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.48.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.48.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.48.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.49.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.49.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.49.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.50.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.50.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.50.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.51.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.51.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.51.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.52.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.52.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.52.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.53.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.53.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.53.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.54.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.54.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.54.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.55.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.55.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.55.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.56.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.56.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.56.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.57.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.57.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.57.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.58.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.58.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.58.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.59.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.59.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.59.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.60.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.60.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.60.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.61.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.61.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.61.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.62.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.62.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.62.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.63.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.63.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.63.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.64.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.64.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.64.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.65.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.65.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.65.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.66.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.66.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.66.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.67.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.67.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.67.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.68.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.68.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.68.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.69.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.69.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.69.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.70.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.70.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.70.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.71.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.71.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.71.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.72.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.72.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.72.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.73.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.73.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.73.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.74.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.74.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.74.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.75.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.75.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.75.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.76.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.76.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.76.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.77.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.77.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.77.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.78.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.78.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.78.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.79.gate_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.79.up_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.18.mlp.experts.79.down_proj.weight": "model-00019-of-000051.safetensors", + "model.layers.19.input_layernorm.weight": "model-00020-of-000051.safetensors", + "model.layers.19.pre_mlp_layernorm.weight": "model-00020-of-000051.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00020-of-000051.safetensors", + "model.layers.19.post_mlp_layernorm.weight": "model-00020-of-000051.safetensors", + "model.layers.19.self_attn.qkv_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.self_attn.k_layernorm.weight": "model-00020-of-000051.safetensors", + "model.layers.19.self_attn.param_sink_key": "model-00020-of-000051.safetensors", + "model.layers.19.self_attn.param_sink_value": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.gate.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.e_score_correction_bias": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.shared_experts.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.shared_experts.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.shared_experts.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.0.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.0.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.0.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.1.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.1.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.1.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.2.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.2.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.2.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.3.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.3.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.3.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.4.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.4.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.4.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.5.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.5.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.5.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.6.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.6.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.6.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.7.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.7.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.7.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.8.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.8.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.8.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.9.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.9.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.9.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.10.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.10.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.10.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.11.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.11.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.11.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.12.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.12.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.12.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.13.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.13.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.13.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.14.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.14.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.14.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.15.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.15.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.15.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.16.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.16.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.16.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.17.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.17.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.17.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.18.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.18.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.18.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.19.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.19.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.19.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.20.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.20.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.20.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.21.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.21.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.21.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.22.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.22.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.22.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.23.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.23.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.23.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.24.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.24.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.24.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.25.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.25.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.25.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.26.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.26.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.26.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.27.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.27.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.27.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.28.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.28.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.28.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.29.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.29.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.29.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.30.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.30.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.30.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.31.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.31.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.31.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.32.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.32.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.32.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.33.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.33.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.33.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.34.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.34.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.34.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.35.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.35.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.35.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.36.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.36.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.36.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.37.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.37.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.37.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.38.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.38.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.38.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.39.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.39.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.39.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.40.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.40.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.40.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.41.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.41.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.41.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.42.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.42.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.42.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.43.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.43.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.43.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.44.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.44.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.44.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.45.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.45.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.45.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.46.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.46.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.46.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.47.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.47.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.47.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.48.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.48.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.48.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.49.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.49.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.49.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.50.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.50.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.50.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.51.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.51.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.51.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.52.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.52.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.52.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.53.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.53.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.53.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.54.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.54.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.54.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.55.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.55.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.55.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.56.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.56.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.56.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.57.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.57.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.57.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.58.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.58.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.58.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.59.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.59.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.59.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.60.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.60.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.60.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.61.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.61.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.61.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.62.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.62.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.62.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.63.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.63.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.63.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.64.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.64.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.64.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.65.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.65.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.65.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.66.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.66.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.66.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.67.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.67.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.67.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.68.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.68.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.68.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.69.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.69.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.69.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.70.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.70.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.70.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.71.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.71.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.71.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.72.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.72.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.72.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.73.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.73.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.73.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.74.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.74.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.74.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.75.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.75.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.75.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.76.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.76.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.76.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.77.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.77.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.77.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.78.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.78.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.78.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.79.gate_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.79.up_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.19.mlp.experts.79.down_proj.weight": "model-00020-of-000051.safetensors", + "model.layers.20.input_layernorm.weight": "model-00021-of-000051.safetensors", + "model.layers.20.pre_mlp_layernorm.weight": "model-00021-of-000051.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00021-of-000051.safetensors", + "model.layers.20.post_mlp_layernorm.weight": "model-00021-of-000051.safetensors", + "model.layers.20.self_attn.qkv_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.self_attn.k_layernorm.weight": "model-00021-of-000051.safetensors", + "model.layers.20.self_attn.param_sink_key": "model-00021-of-000051.safetensors", + "model.layers.20.self_attn.param_sink_value": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.gate.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.e_score_correction_bias": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.shared_experts.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.shared_experts.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.shared_experts.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.0.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.0.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.0.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.1.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.1.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.1.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.2.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.2.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.2.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.3.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.3.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.3.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.4.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.4.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.4.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.5.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.5.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.5.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.6.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.6.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.6.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.7.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.7.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.7.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.8.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.8.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.8.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.9.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.9.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.9.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.10.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.10.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.10.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.11.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.11.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.11.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.12.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.12.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.12.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.13.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.13.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.13.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.14.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.14.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.14.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.15.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.15.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.15.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.16.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.16.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.16.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.17.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.17.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.17.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.18.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.18.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.18.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.19.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.19.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.19.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.20.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.20.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.20.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.21.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.21.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.21.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.22.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.22.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.22.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.23.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.23.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.23.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.24.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.24.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.24.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.25.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.25.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.25.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.26.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.26.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.26.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.27.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.27.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.27.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.28.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.28.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.28.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.29.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.29.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.29.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.30.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.30.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.30.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.31.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.31.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.31.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.32.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.32.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.32.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.33.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.33.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.33.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.34.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.34.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.34.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.35.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.35.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.35.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.36.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.36.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.36.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.37.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.37.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.37.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.38.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.38.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.38.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.39.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.39.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.39.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.40.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.40.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.40.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.41.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.41.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.41.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.42.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.42.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.42.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.43.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.43.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.43.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.44.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.44.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.44.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.45.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.45.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.45.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.46.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.46.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.46.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.47.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.47.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.47.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.48.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.48.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.48.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.49.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.49.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.49.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.50.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.50.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.50.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.51.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.51.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.51.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.52.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.52.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.52.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.53.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.53.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.53.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.54.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.54.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.54.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.55.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.55.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.55.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.56.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.56.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.56.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.57.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.57.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.57.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.58.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.58.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.58.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.59.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.59.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.59.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.60.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.60.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.60.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.61.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.61.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.61.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.62.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.62.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.62.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.63.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.63.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.63.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.64.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.64.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.64.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.65.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.65.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.65.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.66.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.66.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.66.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.67.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.67.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.67.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.68.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.68.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.68.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.69.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.69.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.69.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.70.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.70.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.70.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.71.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.71.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.71.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.72.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.72.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.72.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.73.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.73.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.73.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.74.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.74.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.74.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.75.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.75.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.75.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.76.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.76.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.76.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.77.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.77.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.77.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.78.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.78.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.78.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.79.gate_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.79.up_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.20.mlp.experts.79.down_proj.weight": "model-00021-of-000051.safetensors", + "model.layers.21.input_layernorm.weight": "model-00022-of-000051.safetensors", + "model.layers.21.pre_mlp_layernorm.weight": "model-00022-of-000051.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00022-of-000051.safetensors", + "model.layers.21.post_mlp_layernorm.weight": "model-00022-of-000051.safetensors", + "model.layers.21.self_attn.qkv_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.self_attn.k_layernorm.weight": "model-00022-of-000051.safetensors", + "model.layers.21.self_attn.param_sink_key": "model-00022-of-000051.safetensors", + "model.layers.21.self_attn.param_sink_value": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.gate.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.e_score_correction_bias": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.shared_experts.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.shared_experts.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.shared_experts.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.0.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.0.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.0.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.1.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.1.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.1.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.2.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.2.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.2.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.3.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.3.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.3.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.4.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.4.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.4.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.5.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.5.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.5.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.6.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.6.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.6.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.7.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.7.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.7.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.8.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.8.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.8.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.9.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.9.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.9.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.10.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.10.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.10.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.11.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.11.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.11.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.12.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.12.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.12.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.13.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.13.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.13.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.14.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.14.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.14.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.15.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.15.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.15.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.16.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.16.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.16.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.17.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.17.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.17.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.18.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.18.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.18.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.19.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.19.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.19.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.20.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.20.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.20.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.21.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.21.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.21.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.22.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.22.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.22.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.23.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.23.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.23.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.24.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.24.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.24.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.25.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.25.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.25.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.26.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.26.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.26.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.27.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.27.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.27.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.28.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.28.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.28.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.29.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.29.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.29.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.30.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.30.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.30.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.31.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.31.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.31.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.32.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.32.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.32.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.33.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.33.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.33.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.34.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.34.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.34.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.35.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.35.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.35.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.36.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.36.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.36.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.37.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.37.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.37.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.38.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.38.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.38.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.39.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.39.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.39.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.40.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.40.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.40.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.41.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.41.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.41.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.42.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.42.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.42.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.43.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.43.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.43.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.44.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.44.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.44.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.45.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.45.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.45.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.46.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.46.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.46.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.47.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.47.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.47.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.48.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.48.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.48.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.49.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.49.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.49.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.50.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.50.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.50.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.51.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.51.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.51.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.52.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.52.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.52.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.53.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.53.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.53.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.54.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.54.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.54.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.55.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.55.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.55.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.56.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.56.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.56.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.57.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.57.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.57.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.58.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.58.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.58.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.59.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.59.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.59.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.60.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.60.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.60.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.61.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.61.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.61.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.62.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.62.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.62.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.63.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.63.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.63.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.64.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.64.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.64.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.65.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.65.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.65.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.66.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.66.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.66.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.67.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.67.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.67.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.68.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.68.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.68.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.69.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.69.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.69.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.70.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.70.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.70.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.71.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.71.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.71.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.72.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.72.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.72.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.73.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.73.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.73.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.74.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.74.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.74.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.75.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.75.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.75.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.76.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.76.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.76.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.77.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.77.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.77.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.78.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.78.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.78.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.79.gate_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.79.up_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.21.mlp.experts.79.down_proj.weight": "model-00022-of-000051.safetensors", + "model.layers.22.input_layernorm.weight": "model-00023-of-000051.safetensors", + "model.layers.22.pre_mlp_layernorm.weight": "model-00023-of-000051.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00023-of-000051.safetensors", + "model.layers.22.post_mlp_layernorm.weight": "model-00023-of-000051.safetensors", + "model.layers.22.self_attn.qkv_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.self_attn.k_layernorm.weight": "model-00023-of-000051.safetensors", + "model.layers.22.self_attn.param_sink_key": "model-00023-of-000051.safetensors", + "model.layers.22.self_attn.param_sink_value": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.gate.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.e_score_correction_bias": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.shared_experts.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.shared_experts.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.shared_experts.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.0.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.0.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.0.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.1.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.1.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.1.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.2.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.2.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.2.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.3.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.3.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.3.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.4.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.4.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.4.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.5.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.5.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.5.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.6.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.6.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.6.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.7.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.7.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.7.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.8.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.8.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.8.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.9.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.9.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.9.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.10.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.10.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.10.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.11.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.11.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.11.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.12.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.12.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.12.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.13.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.13.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.13.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.14.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.14.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.14.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.15.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.15.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.15.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.16.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.16.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.16.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.17.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.17.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.17.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.18.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.18.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.18.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.19.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.19.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.19.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.20.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.20.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.20.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.21.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.21.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.21.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.22.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.22.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.22.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.23.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.23.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.23.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.24.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.24.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.24.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.25.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.25.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.25.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.26.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.26.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.26.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.27.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.27.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.27.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.28.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.28.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.28.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.29.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.29.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.29.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.30.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.30.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.30.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.31.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.31.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.31.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.32.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.32.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.32.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.33.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.33.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.33.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.34.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.34.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.34.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.35.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.35.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.35.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.36.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.36.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.36.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.37.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.37.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.37.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.38.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.38.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.38.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.39.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.39.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.39.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.40.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.40.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.40.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.41.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.41.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.41.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.42.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.42.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.42.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.43.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.43.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.43.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.44.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.44.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.44.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.45.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.45.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.45.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.46.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.46.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.46.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.47.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.47.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.47.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.48.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.48.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.48.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.49.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.49.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.49.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.50.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.50.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.50.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.51.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.51.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.51.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.52.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.52.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.52.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.53.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.53.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.53.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.54.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.54.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.54.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.55.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.55.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.55.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.56.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.56.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.56.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.57.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.57.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.57.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.58.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.58.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.58.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.59.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.59.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.59.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.60.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.60.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.60.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.61.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.61.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.61.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.62.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.62.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.62.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.63.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.63.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.63.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.64.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.64.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.64.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.65.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.65.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.65.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.66.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.66.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.66.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.67.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.67.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.67.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.68.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.68.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.68.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.69.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.69.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.69.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.70.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.70.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.70.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.71.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.71.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.71.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.72.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.72.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.72.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.73.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.73.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.73.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.74.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.74.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.74.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.75.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.75.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.75.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.76.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.76.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.76.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.77.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.77.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.77.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.78.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.78.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.78.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.79.gate_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.79.up_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.22.mlp.experts.79.down_proj.weight": "model-00023-of-000051.safetensors", + "model.layers.23.input_layernorm.weight": "model-00024-of-000051.safetensors", + "model.layers.23.pre_mlp_layernorm.weight": "model-00024-of-000051.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00024-of-000051.safetensors", + "model.layers.23.post_mlp_layernorm.weight": "model-00024-of-000051.safetensors", + "model.layers.23.self_attn.qkv_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.self_attn.k_layernorm.weight": "model-00024-of-000051.safetensors", + "model.layers.23.self_attn.param_sink_key": "model-00024-of-000051.safetensors", + "model.layers.23.self_attn.param_sink_value": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.gate.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.e_score_correction_bias": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.shared_experts.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.shared_experts.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.shared_experts.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.0.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.0.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.0.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.1.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.1.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.1.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.2.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.2.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.2.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.3.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.3.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.3.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.4.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.4.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.4.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.5.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.5.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.5.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.6.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.6.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.6.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.7.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.7.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.7.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.8.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.8.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.8.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.9.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.9.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.9.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.10.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.10.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.10.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.11.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.11.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.11.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.12.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.12.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.12.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.13.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.13.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.13.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.14.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.14.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.14.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.15.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.15.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.15.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.16.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.16.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.16.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.17.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.17.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.17.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.18.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.18.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.18.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.19.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.19.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.19.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.20.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.20.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.20.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.21.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.21.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.21.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.22.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.22.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.22.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.23.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.23.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.23.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.24.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.24.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.24.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.25.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.25.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.25.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.26.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.26.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.26.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.27.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.27.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.27.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.28.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.28.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.28.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.29.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.29.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.29.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.30.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.30.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.30.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.31.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.31.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.31.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.32.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.32.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.32.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.33.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.33.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.33.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.34.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.34.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.34.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.35.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.35.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.35.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.36.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.36.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.36.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.37.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.37.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.37.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.38.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.38.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.38.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.39.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.39.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.39.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.40.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.40.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.40.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.41.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.41.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.41.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.42.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.42.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.42.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.43.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.43.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.43.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.44.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.44.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.44.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.45.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.45.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.45.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.46.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.46.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.46.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.47.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.47.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.47.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.48.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.48.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.48.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.49.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.49.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.49.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.50.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.50.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.50.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.51.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.51.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.51.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.52.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.52.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.52.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.53.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.53.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.53.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.54.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.54.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.54.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.55.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.55.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.55.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.56.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.56.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.56.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.57.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.57.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.57.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.58.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.58.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.58.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.59.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.59.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.59.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.60.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.60.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.60.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.61.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.61.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.61.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.62.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.62.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.62.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.63.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.63.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.63.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.64.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.64.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.64.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.65.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.65.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.65.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.66.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.66.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.66.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.67.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.67.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.67.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.68.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.68.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.68.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.69.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.69.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.69.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.70.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.70.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.70.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.71.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.71.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.71.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.72.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.72.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.72.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.73.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.73.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.73.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.74.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.74.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.74.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.75.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.75.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.75.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.76.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.76.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.76.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.77.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.77.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.77.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.78.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.78.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.78.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.79.gate_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.79.up_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.23.mlp.experts.79.down_proj.weight": "model-00024-of-000051.safetensors", + "model.layers.24.input_layernorm.weight": "model-00025-of-000051.safetensors", + "model.layers.24.pre_mlp_layernorm.weight": "model-00025-of-000051.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00025-of-000051.safetensors", + "model.layers.24.post_mlp_layernorm.weight": "model-00025-of-000051.safetensors", + "model.layers.24.self_attn.qkv_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.self_attn.k_layernorm.weight": "model-00025-of-000051.safetensors", + "model.layers.24.self_attn.param_sink_key": "model-00025-of-000051.safetensors", + "model.layers.24.self_attn.param_sink_value": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.gate.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.e_score_correction_bias": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.shared_experts.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.shared_experts.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.shared_experts.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.0.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.0.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.0.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.1.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.1.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.1.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.2.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.2.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.2.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.3.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.3.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.3.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.4.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.4.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.4.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.5.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.5.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.5.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.6.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.6.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.6.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.7.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.7.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.7.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.8.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.8.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.8.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.9.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.9.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.9.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.10.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.10.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.10.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.11.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.11.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.11.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.12.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.12.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.12.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.13.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.13.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.13.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.14.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.14.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.14.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.15.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.15.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.15.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.16.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.16.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.16.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.17.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.17.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.17.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.18.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.18.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.18.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.19.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.19.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.19.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.20.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.20.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.20.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.21.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.21.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.21.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.22.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.22.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.22.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.23.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.23.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.23.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.24.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.24.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.24.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.25.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.25.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.25.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.26.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.26.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.26.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.27.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.27.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.27.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.28.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.28.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.28.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.29.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.29.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.29.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.30.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.30.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.30.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.31.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.31.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.31.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.32.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.32.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.32.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.33.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.33.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.33.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.34.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.34.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.34.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.35.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.35.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.35.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.36.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.36.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.36.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.37.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.37.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.37.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.38.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.38.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.38.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.39.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.39.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.39.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.40.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.40.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.40.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.41.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.41.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.41.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.42.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.42.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.42.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.43.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.43.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.43.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.44.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.44.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.44.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.45.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.45.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.45.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.46.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.46.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.46.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.47.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.47.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.47.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.48.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.48.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.48.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.49.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.49.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.49.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.50.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.50.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.50.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.51.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.51.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.51.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.52.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.52.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.52.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.53.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.53.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.53.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.54.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.54.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.54.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.55.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.55.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.55.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.56.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.56.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.56.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.57.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.57.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.57.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.58.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.58.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.58.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.59.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.59.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.59.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.60.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.60.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.60.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.61.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.61.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.61.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.62.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.62.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.62.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.63.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.63.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.63.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.64.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.64.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.64.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.65.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.65.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.65.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.66.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.66.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.66.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.67.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.67.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.67.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.68.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.68.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.68.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.69.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.69.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.69.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.70.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.70.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.70.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.71.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.71.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.71.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.72.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.72.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.72.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.73.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.73.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.73.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.74.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.74.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.74.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.75.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.75.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.75.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.76.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.76.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.76.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.77.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.77.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.77.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.78.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.78.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.78.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.79.gate_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.79.up_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.24.mlp.experts.79.down_proj.weight": "model-00025-of-000051.safetensors", + "model.layers.25.input_layernorm.weight": "model-00026-of-000051.safetensors", + "model.layers.25.pre_mlp_layernorm.weight": "model-00026-of-000051.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00026-of-000051.safetensors", + "model.layers.25.post_mlp_layernorm.weight": "model-00026-of-000051.safetensors", + "model.layers.25.self_attn.qkv_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.self_attn.k_layernorm.weight": "model-00026-of-000051.safetensors", + "model.layers.25.self_attn.param_sink_key": "model-00026-of-000051.safetensors", + "model.layers.25.self_attn.param_sink_value": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.gate.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.e_score_correction_bias": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.shared_experts.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.shared_experts.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.shared_experts.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.0.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.0.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.0.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.1.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.1.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.1.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.2.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.2.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.2.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.3.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.3.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.3.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.4.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.4.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.4.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.5.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.5.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.5.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.6.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.6.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.6.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.7.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.7.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.7.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.8.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.8.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.8.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.9.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.9.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.9.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.10.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.10.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.10.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.11.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.11.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.11.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.12.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.12.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.12.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.13.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.13.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.13.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.14.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.14.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.14.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.15.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.15.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.15.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.16.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.16.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.16.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.17.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.17.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.17.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.18.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.18.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.18.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.19.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.19.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.19.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.20.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.20.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.20.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.21.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.21.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.21.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.22.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.22.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.22.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.23.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.23.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.23.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.24.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.24.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.24.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.25.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.25.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.25.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.26.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.26.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.26.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.27.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.27.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.27.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.28.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.28.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.28.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.29.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.29.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.29.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.30.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.30.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.30.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.31.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.31.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.31.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.32.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.32.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.32.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.33.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.33.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.33.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.34.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.34.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.34.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.35.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.35.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.35.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.36.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.36.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.36.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.37.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.37.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.37.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.38.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.38.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.38.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.39.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.39.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.39.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.40.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.40.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.40.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.41.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.41.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.41.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.42.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.42.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.42.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.43.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.43.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.43.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.44.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.44.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.44.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.45.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.45.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.45.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.46.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.46.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.46.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.47.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.47.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.47.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.48.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.48.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.48.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.49.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.49.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.49.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.50.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.50.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.50.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.51.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.51.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.51.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.52.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.52.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.52.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.53.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.53.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.53.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.54.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.54.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.54.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.55.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.55.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.55.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.56.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.56.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.56.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.57.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.57.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.57.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.58.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.58.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.58.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.59.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.59.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.59.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.60.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.60.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.60.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.61.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.61.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.61.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.62.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.62.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.62.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.63.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.63.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.63.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.64.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.64.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.64.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.65.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.65.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.65.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.66.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.66.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.66.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.67.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.67.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.67.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.68.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.68.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.68.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.69.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.69.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.69.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.70.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.70.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.70.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.71.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.71.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.71.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.72.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.72.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.72.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.73.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.73.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.73.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.74.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.74.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.74.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.75.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.75.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.75.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.76.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.76.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.76.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.77.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.77.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.77.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.78.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.78.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.78.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.79.gate_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.79.up_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.25.mlp.experts.79.down_proj.weight": "model-00026-of-000051.safetensors", + "model.layers.26.input_layernorm.weight": "model-00027-of-000051.safetensors", + "model.layers.26.pre_mlp_layernorm.weight": "model-00027-of-000051.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00027-of-000051.safetensors", + "model.layers.26.post_mlp_layernorm.weight": "model-00027-of-000051.safetensors", + "model.layers.26.self_attn.qkv_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.self_attn.k_layernorm.weight": "model-00027-of-000051.safetensors", + "model.layers.26.self_attn.param_sink_key": "model-00027-of-000051.safetensors", + "model.layers.26.self_attn.param_sink_value": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.gate.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.e_score_correction_bias": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.shared_experts.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.shared_experts.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.shared_experts.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.0.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.0.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.0.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.1.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.1.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.1.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.2.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.2.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.2.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.3.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.3.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.3.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.4.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.4.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.4.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.5.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.5.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.5.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.6.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.6.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.6.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.7.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.7.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.7.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.8.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.8.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.8.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.9.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.9.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.9.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.10.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.10.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.10.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.11.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.11.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.11.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.12.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.12.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.12.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.13.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.13.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.13.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.14.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.14.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.14.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.15.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.15.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.15.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.16.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.16.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.16.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.17.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.17.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.17.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.18.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.18.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.18.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.19.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.19.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.19.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.20.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.20.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.20.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.21.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.21.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.21.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.22.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.22.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.22.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.23.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.23.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.23.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.24.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.24.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.24.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.25.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.25.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.25.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.26.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.26.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.26.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.27.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.27.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.27.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.28.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.28.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.28.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.29.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.29.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.29.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.30.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.30.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.30.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.31.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.31.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.31.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.32.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.32.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.32.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.33.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.33.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.33.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.34.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.34.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.34.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.35.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.35.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.35.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.36.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.36.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.36.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.37.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.37.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.37.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.38.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.38.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.38.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.39.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.39.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.39.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.40.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.40.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.40.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.41.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.41.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.41.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.42.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.42.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.42.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.43.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.43.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.43.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.44.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.44.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.44.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.45.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.45.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.45.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.46.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.46.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.46.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.47.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.47.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.47.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.48.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.48.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.48.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.49.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.49.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.49.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.50.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.50.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.50.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.51.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.51.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.51.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.52.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.52.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.52.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.53.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.53.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.53.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.54.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.54.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.54.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.55.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.55.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.55.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.56.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.56.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.56.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.57.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.57.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.57.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.58.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.58.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.58.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.59.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.59.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.59.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.60.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.60.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.60.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.61.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.61.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.61.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.62.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.62.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.62.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.63.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.63.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.63.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.64.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.64.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.64.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.65.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.65.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.65.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.66.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.66.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.66.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.67.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.67.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.67.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.68.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.68.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.68.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.69.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.69.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.69.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.70.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.70.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.70.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.71.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.71.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.71.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.72.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.72.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.72.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.73.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.73.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.73.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.74.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.74.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.74.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.75.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.75.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.75.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.76.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.76.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.76.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.77.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.77.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.77.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.78.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.78.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.78.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.79.gate_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.79.up_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.26.mlp.experts.79.down_proj.weight": "model-00027-of-000051.safetensors", + "model.layers.27.input_layernorm.weight": "model-00028-of-000051.safetensors", + "model.layers.27.pre_mlp_layernorm.weight": "model-00028-of-000051.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00028-of-000051.safetensors", + "model.layers.27.post_mlp_layernorm.weight": "model-00028-of-000051.safetensors", + "model.layers.27.self_attn.qkv_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.self_attn.k_layernorm.weight": "model-00028-of-000051.safetensors", + "model.layers.27.self_attn.param_sink_key": "model-00028-of-000051.safetensors", + "model.layers.27.self_attn.param_sink_value": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.gate.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.e_score_correction_bias": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.shared_experts.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.shared_experts.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.shared_experts.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.0.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.0.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.0.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.1.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.1.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.1.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.2.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.2.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.2.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.3.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.3.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.3.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.4.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.4.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.4.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.5.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.5.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.5.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.6.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.6.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.6.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.7.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.7.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.7.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.8.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.8.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.8.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.9.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.9.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.9.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.10.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.10.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.10.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.11.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.11.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.11.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.12.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.12.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.12.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.13.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.13.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.13.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.14.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.14.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.14.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.15.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.15.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.15.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.16.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.16.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.16.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.17.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.17.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.17.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.18.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.18.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.18.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.19.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.19.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.19.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.20.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.20.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.20.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.21.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.21.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.21.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.22.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.22.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.22.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.23.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.23.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.23.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.24.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.24.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.24.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.25.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.25.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.25.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.26.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.26.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.26.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.27.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.27.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.27.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.28.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.28.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.28.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.29.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.29.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.29.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.30.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.30.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.30.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.31.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.31.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.31.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.32.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.32.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.32.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.33.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.33.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.33.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.34.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.34.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.34.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.35.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.35.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.35.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.36.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.36.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.36.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.37.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.37.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.37.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.38.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.38.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.38.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.39.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.39.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.39.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.40.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.40.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.40.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.41.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.41.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.41.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.42.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.42.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.42.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.43.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.43.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.43.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.44.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.44.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.44.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.45.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.45.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.45.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.46.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.46.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.46.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.47.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.47.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.47.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.48.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.48.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.48.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.49.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.49.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.49.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.50.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.50.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.50.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.51.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.51.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.51.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.52.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.52.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.52.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.53.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.53.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.53.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.54.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.54.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.54.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.55.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.55.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.55.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.56.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.56.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.56.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.57.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.57.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.57.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.58.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.58.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.58.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.59.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.59.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.59.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.60.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.60.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.60.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.61.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.61.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.61.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.62.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.62.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.62.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.63.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.63.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.63.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.64.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.64.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.64.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.65.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.65.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.65.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.66.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.66.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.66.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.67.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.67.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.67.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.68.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.68.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.68.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.69.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.69.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.69.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.70.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.70.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.70.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.71.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.71.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.71.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.72.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.72.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.72.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.73.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.73.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.73.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.74.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.74.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.74.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.75.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.75.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.75.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.76.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.76.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.76.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.77.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.77.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.77.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.78.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.78.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.78.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.79.gate_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.79.up_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.27.mlp.experts.79.down_proj.weight": "model-00028-of-000051.safetensors", + "model.layers.28.input_layernorm.weight": "model-00029-of-000051.safetensors", + "model.layers.28.pre_mlp_layernorm.weight": "model-00029-of-000051.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00029-of-000051.safetensors", + "model.layers.28.post_mlp_layernorm.weight": "model-00029-of-000051.safetensors", + "model.layers.28.self_attn.qkv_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.self_attn.k_layernorm.weight": "model-00029-of-000051.safetensors", + "model.layers.28.self_attn.param_sink_key": "model-00029-of-000051.safetensors", + "model.layers.28.self_attn.param_sink_value": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.gate.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.e_score_correction_bias": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.shared_experts.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.shared_experts.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.shared_experts.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.0.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.0.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.0.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.1.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.1.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.1.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.2.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.2.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.2.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.3.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.3.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.3.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.4.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.4.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.4.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.5.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.5.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.5.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.6.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.6.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.6.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.7.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.7.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.7.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.8.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.8.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.8.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.9.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.9.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.9.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.10.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.10.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.10.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.11.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.11.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.11.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.12.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.12.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.12.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.13.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.13.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.13.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.14.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.14.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.14.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.15.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.15.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.15.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.16.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.16.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.16.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.17.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.17.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.17.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.18.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.18.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.18.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.19.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.19.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.19.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.20.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.20.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.20.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.21.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.21.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.21.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.22.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.22.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.22.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.23.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.23.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.23.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.24.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.24.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.24.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.25.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.25.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.25.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.26.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.26.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.26.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.27.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.27.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.27.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.28.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.28.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.28.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.29.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.29.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.29.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.30.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.30.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.30.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.31.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.31.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.31.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.32.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.32.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.32.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.33.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.33.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.33.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.34.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.34.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.34.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.35.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.35.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.35.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.36.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.36.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.36.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.37.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.37.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.37.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.38.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.38.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.38.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.39.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.39.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.39.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.40.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.40.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.40.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.41.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.41.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.41.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.42.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.42.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.42.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.43.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.43.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.43.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.44.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.44.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.44.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.45.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.45.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.45.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.46.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.46.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.46.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.47.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.47.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.47.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.48.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.48.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.48.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.49.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.49.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.49.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.50.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.50.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.50.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.51.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.51.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.51.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.52.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.52.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.52.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.53.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.53.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.53.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.54.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.54.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.54.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.55.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.55.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.55.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.56.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.56.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.56.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.57.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.57.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.57.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.58.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.58.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.58.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.59.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.59.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.59.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.60.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.60.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.60.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.61.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.61.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.61.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.62.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.62.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.62.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.63.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.63.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.63.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.64.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.64.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.64.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.65.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.65.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.65.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.66.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.66.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.66.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.67.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.67.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.67.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.68.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.68.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.68.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.69.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.69.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.69.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.70.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.70.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.70.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.71.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.71.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.71.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.72.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.72.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.72.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.73.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.73.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.73.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.74.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.74.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.74.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.75.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.75.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.75.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.76.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.76.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.76.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.77.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.77.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.77.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.78.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.78.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.78.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.79.gate_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.79.up_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.28.mlp.experts.79.down_proj.weight": "model-00029-of-000051.safetensors", + "model.layers.29.input_layernorm.weight": "model-00030-of-000051.safetensors", + "model.layers.29.pre_mlp_layernorm.weight": "model-00030-of-000051.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00030-of-000051.safetensors", + "model.layers.29.post_mlp_layernorm.weight": "model-00030-of-000051.safetensors", + "model.layers.29.self_attn.qkv_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.self_attn.k_layernorm.weight": "model-00030-of-000051.safetensors", + "model.layers.29.self_attn.param_sink_key": "model-00030-of-000051.safetensors", + "model.layers.29.self_attn.param_sink_value": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.gate.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.e_score_correction_bias": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.shared_experts.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.shared_experts.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.shared_experts.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.0.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.0.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.0.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.1.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.1.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.1.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.2.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.2.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.2.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.3.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.3.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.3.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.4.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.4.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.4.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.5.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.5.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.5.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.6.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.6.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.6.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.7.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.7.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.7.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.8.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.8.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.8.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.9.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.9.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.9.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.10.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.10.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.10.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.11.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.11.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.11.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.12.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.12.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.12.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.13.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.13.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.13.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.14.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.14.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.14.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.15.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.15.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.15.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.16.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.16.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.16.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.17.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.17.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.17.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.18.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.18.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.18.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.19.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.19.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.19.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.20.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.20.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.20.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.21.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.21.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.21.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.22.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.22.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.22.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.23.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.23.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.23.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.24.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.24.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.24.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.25.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.25.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.25.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.26.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.26.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.26.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.27.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.27.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.27.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.28.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.28.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.28.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.29.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.29.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.29.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.30.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.30.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.30.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.31.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.31.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.31.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.32.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.32.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.32.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.33.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.33.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.33.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.34.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.34.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.34.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.35.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.35.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.35.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.36.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.36.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.36.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.37.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.37.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.37.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.38.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.38.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.38.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.39.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.39.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.39.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.40.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.40.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.40.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.41.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.41.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.41.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.42.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.42.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.42.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.43.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.43.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.43.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.44.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.44.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.44.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.45.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.45.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.45.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.46.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.46.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.46.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.47.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.47.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.47.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.48.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.48.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.48.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.49.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.49.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.49.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.50.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.50.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.50.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.51.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.51.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.51.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.52.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.52.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.52.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.53.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.53.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.53.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.54.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.54.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.54.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.55.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.55.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.55.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.56.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.56.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.56.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.57.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.57.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.57.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.58.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.58.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.58.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.59.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.59.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.59.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.60.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.60.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.60.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.61.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.61.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.61.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.62.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.62.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.62.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.63.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.63.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.63.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.64.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.64.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.64.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.65.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.65.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.65.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.66.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.66.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.66.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.67.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.67.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.67.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.68.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.68.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.68.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.69.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.69.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.69.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.70.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.70.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.70.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.71.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.71.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.71.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.72.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.72.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.72.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.73.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.73.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.73.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.74.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.74.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.74.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.75.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.75.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.75.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.76.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.76.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.76.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.77.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.77.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.77.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.78.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.78.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.78.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.79.gate_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.79.up_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.29.mlp.experts.79.down_proj.weight": "model-00030-of-000051.safetensors", + "model.layers.30.input_layernorm.weight": "model-00031-of-000051.safetensors", + "model.layers.30.pre_mlp_layernorm.weight": "model-00031-of-000051.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00031-of-000051.safetensors", + "model.layers.30.post_mlp_layernorm.weight": "model-00031-of-000051.safetensors", + "model.layers.30.self_attn.qkv_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.self_attn.k_layernorm.weight": "model-00031-of-000051.safetensors", + "model.layers.30.self_attn.param_sink_key": "model-00031-of-000051.safetensors", + "model.layers.30.self_attn.param_sink_value": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.gate.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.e_score_correction_bias": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.shared_experts.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.shared_experts.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.shared_experts.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.0.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.0.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.0.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.1.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.1.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.1.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.2.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.2.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.2.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.3.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.3.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.3.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.4.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.4.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.4.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.5.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.5.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.5.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.6.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.6.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.6.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.7.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.7.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.7.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.8.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.8.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.8.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.9.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.9.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.9.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.10.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.10.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.10.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.11.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.11.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.11.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.12.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.12.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.12.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.13.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.13.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.13.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.14.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.14.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.14.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.15.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.15.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.15.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.16.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.16.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.16.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.17.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.17.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.17.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.18.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.18.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.18.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.19.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.19.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.19.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.20.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.20.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.20.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.21.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.21.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.21.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.22.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.22.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.22.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.23.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.23.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.23.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.24.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.24.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.24.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.25.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.25.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.25.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.26.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.26.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.26.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.27.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.27.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.27.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.28.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.28.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.28.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.29.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.29.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.29.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.30.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.30.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.30.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.31.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.31.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.31.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.32.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.32.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.32.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.33.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.33.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.33.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.34.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.34.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.34.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.35.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.35.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.35.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.36.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.36.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.36.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.37.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.37.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.37.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.38.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.38.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.38.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.39.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.39.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.39.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.40.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.40.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.40.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.41.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.41.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.41.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.42.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.42.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.42.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.43.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.43.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.43.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.44.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.44.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.44.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.45.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.45.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.45.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.46.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.46.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.46.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.47.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.47.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.47.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.48.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.48.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.48.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.49.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.49.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.49.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.50.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.50.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.50.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.51.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.51.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.51.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.52.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.52.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.52.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.53.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.53.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.53.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.54.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.54.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.54.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.55.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.55.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.55.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.56.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.56.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.56.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.57.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.57.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.57.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.58.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.58.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.58.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.59.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.59.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.59.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.60.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.60.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.60.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.61.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.61.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.61.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.62.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.62.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.62.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.63.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.63.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.63.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.64.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.64.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.64.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.65.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.65.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.65.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.66.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.66.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.66.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.67.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.67.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.67.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.68.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.68.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.68.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.69.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.69.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.69.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.70.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.70.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.70.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.71.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.71.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.71.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.72.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.72.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.72.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.73.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.73.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.73.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.74.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.74.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.74.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.75.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.75.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.75.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.76.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.76.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.76.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.77.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.77.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.77.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.78.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.78.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.78.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.79.gate_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.79.up_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.30.mlp.experts.79.down_proj.weight": "model-00031-of-000051.safetensors", + "model.layers.31.input_layernorm.weight": "model-00032-of-000051.safetensors", + "model.layers.31.pre_mlp_layernorm.weight": "model-00032-of-000051.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00032-of-000051.safetensors", + "model.layers.31.post_mlp_layernorm.weight": "model-00032-of-000051.safetensors", + "model.layers.31.self_attn.qkv_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.self_attn.k_layernorm.weight": "model-00032-of-000051.safetensors", + "model.layers.31.self_attn.param_sink_key": "model-00032-of-000051.safetensors", + "model.layers.31.self_attn.param_sink_value": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.gate.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.e_score_correction_bias": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.shared_experts.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.shared_experts.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.shared_experts.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.0.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.0.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.0.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.1.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.1.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.1.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.2.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.2.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.2.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.3.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.3.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.3.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.4.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.4.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.4.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.5.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.5.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.5.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.6.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.6.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.6.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.7.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.7.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.7.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.8.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.8.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.8.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.9.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.9.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.9.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.10.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.10.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.10.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.11.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.11.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.11.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.12.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.12.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.12.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.13.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.13.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.13.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.14.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.14.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.14.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.15.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.15.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.15.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.16.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.16.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.16.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.17.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.17.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.17.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.18.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.18.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.18.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.19.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.19.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.19.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.20.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.20.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.20.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.21.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.21.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.21.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.22.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.22.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.22.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.23.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.23.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.23.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.24.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.24.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.24.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.25.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.25.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.25.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.26.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.26.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.26.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.27.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.27.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.27.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.28.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.28.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.28.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.29.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.29.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.29.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.30.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.30.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.30.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.31.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.31.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.31.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.32.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.32.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.32.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.33.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.33.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.33.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.34.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.34.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.34.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.35.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.35.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.35.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.36.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.36.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.36.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.37.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.37.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.37.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.38.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.38.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.38.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.39.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.39.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.39.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.40.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.40.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.40.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.41.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.41.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.41.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.42.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.42.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.42.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.43.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.43.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.43.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.44.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.44.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.44.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.45.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.45.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.45.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.46.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.46.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.46.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.47.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.47.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.47.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.48.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.48.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.48.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.49.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.49.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.49.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.50.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.50.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.50.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.51.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.51.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.51.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.52.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.52.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.52.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.53.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.53.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.53.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.54.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.54.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.54.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.55.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.55.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.55.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.56.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.56.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.56.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.57.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.57.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.57.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.58.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.58.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.58.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.59.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.59.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.59.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.60.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.60.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.60.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.61.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.61.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.61.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.62.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.62.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.62.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.63.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.63.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.63.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.64.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.64.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.64.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.65.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.65.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.65.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.66.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.66.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.66.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.67.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.67.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.67.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.68.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.68.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.68.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.69.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.69.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.69.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.70.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.70.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.70.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.71.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.71.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.71.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.72.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.72.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.72.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.73.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.73.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.73.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.74.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.74.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.74.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.75.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.75.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.75.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.76.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.76.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.76.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.77.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.77.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.77.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.78.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.78.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.78.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.79.gate_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.79.up_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.31.mlp.experts.79.down_proj.weight": "model-00032-of-000051.safetensors", + "model.layers.32.input_layernorm.weight": "model-00033-of-000051.safetensors", + "model.layers.32.pre_mlp_layernorm.weight": "model-00033-of-000051.safetensors", + "model.layers.32.post_attention_layernorm.weight": "model-00033-of-000051.safetensors", + "model.layers.32.post_mlp_layernorm.weight": "model-00033-of-000051.safetensors", + "model.layers.32.self_attn.qkv_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.self_attn.o_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.self_attn.k_layernorm.weight": "model-00033-of-000051.safetensors", + "model.layers.32.self_attn.param_sink_key": "model-00033-of-000051.safetensors", + "model.layers.32.self_attn.param_sink_value": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.gate.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.e_score_correction_bias": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.shared_experts.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.shared_experts.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.shared_experts.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.0.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.0.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.0.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.1.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.1.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.1.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.2.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.2.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.2.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.3.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.3.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.3.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.4.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.4.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.4.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.5.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.5.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.5.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.6.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.6.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.6.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.7.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.7.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.7.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.8.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.8.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.8.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.9.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.9.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.9.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.10.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.10.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.10.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.11.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.11.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.11.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.12.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.12.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.12.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.13.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.13.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.13.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.14.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.14.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.14.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.15.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.15.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.15.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.16.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.16.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.16.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.17.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.17.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.17.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.18.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.18.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.18.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.19.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.19.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.19.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.20.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.20.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.20.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.21.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.21.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.21.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.22.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.22.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.22.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.23.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.23.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.23.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.24.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.24.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.24.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.25.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.25.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.25.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.26.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.26.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.26.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.27.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.27.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.27.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.28.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.28.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.28.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.29.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.29.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.29.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.30.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.30.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.30.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.31.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.31.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.31.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.32.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.32.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.32.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.33.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.33.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.33.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.34.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.34.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.34.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.35.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.35.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.35.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.36.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.36.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.36.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.37.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.37.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.37.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.38.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.38.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.38.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.39.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.39.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.39.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.40.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.40.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.40.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.41.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.41.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.41.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.42.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.42.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.42.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.43.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.43.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.43.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.44.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.44.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.44.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.45.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.45.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.45.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.46.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.46.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.46.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.47.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.47.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.47.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.48.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.48.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.48.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.49.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.49.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.49.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.50.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.50.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.50.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.51.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.51.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.51.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.52.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.52.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.52.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.53.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.53.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.53.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.54.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.54.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.54.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.55.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.55.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.55.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.56.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.56.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.56.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.57.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.57.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.57.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.58.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.58.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.58.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.59.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.59.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.59.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.60.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.60.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.60.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.61.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.61.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.61.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.62.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.62.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.62.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.63.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.63.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.63.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.64.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.64.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.64.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.65.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.65.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.65.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.66.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.66.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.66.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.67.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.67.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.67.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.68.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.68.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.68.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.69.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.69.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.69.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.70.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.70.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.70.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.71.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.71.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.71.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.72.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.72.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.72.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.73.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.73.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.73.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.74.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.74.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.74.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.75.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.75.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.75.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.76.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.76.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.76.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.77.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.77.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.77.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.78.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.78.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.78.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.79.gate_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.79.up_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.32.mlp.experts.79.down_proj.weight": "model-00033-of-000051.safetensors", + "model.layers.33.input_layernorm.weight": "model-00034-of-000051.safetensors", + "model.layers.33.pre_mlp_layernorm.weight": "model-00034-of-000051.safetensors", + "model.layers.33.post_attention_layernorm.weight": "model-00034-of-000051.safetensors", + "model.layers.33.post_mlp_layernorm.weight": "model-00034-of-000051.safetensors", + "model.layers.33.self_attn.qkv_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.self_attn.o_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.self_attn.k_layernorm.weight": "model-00034-of-000051.safetensors", + "model.layers.33.self_attn.param_sink_key": "model-00034-of-000051.safetensors", + "model.layers.33.self_attn.param_sink_value": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.gate.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.e_score_correction_bias": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.shared_experts.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.shared_experts.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.shared_experts.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.0.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.0.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.0.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.1.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.1.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.1.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.2.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.2.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.2.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.3.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.3.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.3.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.4.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.4.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.4.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.5.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.5.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.5.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.6.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.6.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.6.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.7.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.7.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.7.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.8.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.8.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.8.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.9.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.9.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.9.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.10.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.10.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.10.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.11.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.11.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.11.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.12.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.12.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.12.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.13.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.13.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.13.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.14.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.14.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.14.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.15.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.15.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.15.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.16.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.16.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.16.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.17.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.17.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.17.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.18.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.18.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.18.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.19.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.19.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.19.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.20.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.20.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.20.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.21.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.21.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.21.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.22.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.22.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.22.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.23.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.23.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.23.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.24.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.24.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.24.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.25.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.25.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.25.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.26.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.26.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.26.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.27.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.27.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.27.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.28.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.28.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.28.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.29.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.29.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.29.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.30.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.30.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.30.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.31.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.31.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.31.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.32.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.32.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.32.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.33.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.33.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.33.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.34.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.34.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.34.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.35.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.35.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.35.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.36.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.36.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.36.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.37.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.37.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.37.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.38.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.38.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.38.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.39.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.39.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.39.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.40.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.40.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.40.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.41.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.41.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.41.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.42.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.42.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.42.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.43.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.43.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.43.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.44.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.44.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.44.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.45.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.45.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.45.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.46.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.46.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.46.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.47.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.47.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.47.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.48.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.48.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.48.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.49.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.49.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.49.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.50.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.50.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.50.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.51.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.51.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.51.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.52.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.52.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.52.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.53.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.53.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.53.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.54.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.54.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.54.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.55.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.55.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.55.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.56.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.56.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.56.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.57.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.57.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.57.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.58.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.58.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.58.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.59.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.59.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.59.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.60.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.60.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.60.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.61.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.61.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.61.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.62.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.62.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.62.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.63.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.63.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.63.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.64.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.64.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.64.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.65.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.65.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.65.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.66.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.66.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.66.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.67.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.67.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.67.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.68.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.68.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.68.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.69.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.69.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.69.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.70.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.70.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.70.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.71.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.71.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.71.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.72.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.72.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.72.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.73.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.73.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.73.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.74.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.74.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.74.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.75.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.75.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.75.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.76.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.76.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.76.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.77.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.77.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.77.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.78.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.78.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.78.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.79.gate_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.79.up_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.33.mlp.experts.79.down_proj.weight": "model-00034-of-000051.safetensors", + "model.layers.34.input_layernorm.weight": "model-00035-of-000051.safetensors", + "model.layers.34.pre_mlp_layernorm.weight": "model-00035-of-000051.safetensors", + "model.layers.34.post_attention_layernorm.weight": "model-00035-of-000051.safetensors", + "model.layers.34.post_mlp_layernorm.weight": "model-00035-of-000051.safetensors", + "model.layers.34.self_attn.qkv_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.self_attn.o_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.self_attn.k_layernorm.weight": "model-00035-of-000051.safetensors", + "model.layers.34.self_attn.param_sink_key": "model-00035-of-000051.safetensors", + "model.layers.34.self_attn.param_sink_value": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.gate.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.e_score_correction_bias": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.shared_experts.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.shared_experts.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.shared_experts.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.0.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.0.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.0.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.1.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.1.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.1.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.2.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.2.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.2.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.3.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.3.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.3.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.4.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.4.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.4.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.5.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.5.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.5.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.6.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.6.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.6.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.7.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.7.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.7.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.8.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.8.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.8.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.9.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.9.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.9.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.10.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.10.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.10.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.11.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.11.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.11.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.12.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.12.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.12.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.13.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.13.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.13.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.14.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.14.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.14.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.15.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.15.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.15.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.16.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.16.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.16.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.17.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.17.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.17.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.18.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.18.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.18.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.19.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.19.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.19.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.20.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.20.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.20.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.21.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.21.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.21.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.22.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.22.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.22.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.23.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.23.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.23.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.24.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.24.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.24.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.25.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.25.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.25.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.26.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.26.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.26.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.27.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.27.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.27.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.28.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.28.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.28.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.29.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.29.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.29.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.30.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.30.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.30.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.31.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.31.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.31.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.32.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.32.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.32.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.33.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.33.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.33.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.34.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.34.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.34.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.35.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.35.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.35.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.36.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.36.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.36.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.37.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.37.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.37.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.38.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.38.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.38.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.39.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.39.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.39.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.40.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.40.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.40.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.41.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.41.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.41.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.42.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.42.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.42.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.43.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.43.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.43.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.44.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.44.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.44.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.45.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.45.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.45.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.46.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.46.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.46.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.47.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.47.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.47.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.48.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.48.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.48.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.49.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.49.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.49.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.50.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.50.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.50.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.51.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.51.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.51.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.52.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.52.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.52.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.53.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.53.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.53.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.54.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.54.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.54.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.55.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.55.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.55.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.56.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.56.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.56.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.57.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.57.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.57.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.58.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.58.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.58.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.59.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.59.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.59.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.60.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.60.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.60.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.61.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.61.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.61.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.62.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.62.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.62.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.63.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.63.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.63.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.64.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.64.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.64.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.65.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.65.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.65.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.66.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.66.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.66.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.67.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.67.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.67.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.68.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.68.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.68.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.69.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.69.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.69.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.70.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.70.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.70.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.71.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.71.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.71.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.72.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.72.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.72.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.73.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.73.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.73.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.74.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.74.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.74.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.75.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.75.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.75.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.76.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.76.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.76.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.77.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.77.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.77.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.78.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.78.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.78.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.79.gate_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.79.up_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.34.mlp.experts.79.down_proj.weight": "model-00035-of-000051.safetensors", + "model.layers.35.input_layernorm.weight": "model-00036-of-000051.safetensors", + "model.layers.35.pre_mlp_layernorm.weight": "model-00036-of-000051.safetensors", + "model.layers.35.post_attention_layernorm.weight": "model-00036-of-000051.safetensors", + "model.layers.35.post_mlp_layernorm.weight": "model-00036-of-000051.safetensors", + "model.layers.35.self_attn.qkv_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.self_attn.o_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.self_attn.k_layernorm.weight": "model-00036-of-000051.safetensors", + "model.layers.35.self_attn.param_sink_key": "model-00036-of-000051.safetensors", + "model.layers.35.self_attn.param_sink_value": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.gate.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.e_score_correction_bias": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.shared_experts.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.shared_experts.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.shared_experts.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.0.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.0.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.0.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.1.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.1.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.1.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.2.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.2.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.2.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.3.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.3.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.3.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.4.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.4.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.4.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.5.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.5.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.5.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.6.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.6.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.6.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.7.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.7.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.7.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.8.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.8.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.8.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.9.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.9.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.9.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.10.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.10.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.10.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.11.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.11.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.11.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.12.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.12.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.12.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.13.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.13.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.13.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.14.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.14.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.14.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.15.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.15.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.15.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.16.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.16.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.16.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.17.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.17.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.17.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.18.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.18.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.18.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.19.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.19.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.19.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.20.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.20.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.20.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.21.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.21.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.21.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.22.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.22.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.22.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.23.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.23.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.23.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.24.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.24.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.24.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.25.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.25.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.25.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.26.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.26.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.26.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.27.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.27.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.27.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.28.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.28.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.28.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.29.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.29.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.29.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.30.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.30.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.30.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.31.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.31.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.31.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.32.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.32.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.32.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.33.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.33.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.33.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.34.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.34.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.34.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.35.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.35.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.35.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.36.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.36.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.36.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.37.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.37.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.37.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.38.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.38.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.38.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.39.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.39.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.39.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.40.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.40.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.40.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.41.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.41.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.41.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.42.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.42.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.42.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.43.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.43.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.43.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.44.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.44.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.44.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.45.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.45.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.45.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.46.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.46.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.46.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.47.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.47.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.47.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.48.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.48.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.48.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.49.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.49.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.49.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.50.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.50.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.50.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.51.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.51.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.51.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.52.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.52.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.52.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.53.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.53.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.53.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.54.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.54.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.54.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.55.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.55.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.55.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.56.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.56.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.56.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.57.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.57.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.57.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.58.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.58.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.58.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.59.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.59.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.59.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.60.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.60.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.60.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.61.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.61.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.61.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.62.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.62.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.62.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.63.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.63.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.63.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.64.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.64.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.64.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.65.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.65.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.65.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.66.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.66.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.66.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.67.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.67.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.67.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.68.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.68.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.68.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.69.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.69.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.69.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.70.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.70.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.70.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.71.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.71.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.71.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.72.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.72.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.72.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.73.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.73.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.73.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.74.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.74.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.74.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.75.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.75.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.75.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.76.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.76.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.76.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.77.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.77.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.77.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.78.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.78.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.78.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.79.gate_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.79.up_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.35.mlp.experts.79.down_proj.weight": "model-00036-of-000051.safetensors", + "model.layers.36.input_layernorm.weight": "model-00037-of-000051.safetensors", + "model.layers.36.pre_mlp_layernorm.weight": "model-00037-of-000051.safetensors", + "model.layers.36.post_attention_layernorm.weight": "model-00037-of-000051.safetensors", + "model.layers.36.post_mlp_layernorm.weight": "model-00037-of-000051.safetensors", + "model.layers.36.self_attn.qkv_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.self_attn.o_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.self_attn.k_layernorm.weight": "model-00037-of-000051.safetensors", + "model.layers.36.self_attn.param_sink_key": "model-00037-of-000051.safetensors", + "model.layers.36.self_attn.param_sink_value": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.gate.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.e_score_correction_bias": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.shared_experts.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.shared_experts.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.shared_experts.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.0.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.0.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.0.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.1.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.1.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.1.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.2.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.2.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.2.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.3.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.3.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.3.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.4.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.4.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.4.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.5.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.5.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.5.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.6.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.6.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.6.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.7.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.7.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.7.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.8.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.8.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.8.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.9.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.9.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.9.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.10.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.10.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.10.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.11.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.11.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.11.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.12.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.12.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.12.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.13.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.13.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.13.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.14.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.14.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.14.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.15.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.15.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.15.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.16.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.16.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.16.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.17.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.17.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.17.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.18.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.18.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.18.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.19.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.19.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.19.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.20.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.20.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.20.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.21.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.21.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.21.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.22.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.22.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.22.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.23.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.23.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.23.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.24.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.24.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.24.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.25.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.25.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.25.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.26.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.26.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.26.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.27.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.27.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.27.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.28.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.28.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.28.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.29.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.29.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.29.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.30.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.30.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.30.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.31.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.31.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.31.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.32.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.32.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.32.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.33.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.33.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.33.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.34.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.34.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.34.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.35.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.35.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.35.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.36.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.36.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.36.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.37.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.37.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.37.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.38.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.38.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.38.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.39.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.39.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.39.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.40.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.40.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.40.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.41.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.41.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.41.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.42.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.42.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.42.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.43.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.43.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.43.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.44.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.44.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.44.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.45.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.45.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.45.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.46.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.46.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.46.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.47.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.47.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.47.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.48.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.48.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.48.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.49.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.49.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.49.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.50.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.50.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.50.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.51.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.51.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.51.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.52.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.52.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.52.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.53.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.53.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.53.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.54.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.54.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.54.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.55.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.55.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.55.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.56.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.56.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.56.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.57.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.57.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.57.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.58.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.58.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.58.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.59.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.59.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.59.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.60.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.60.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.60.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.61.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.61.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.61.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.62.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.62.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.62.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.63.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.63.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.63.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.64.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.64.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.64.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.65.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.65.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.65.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.66.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.66.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.66.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.67.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.67.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.67.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.68.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.68.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.68.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.69.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.69.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.69.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.70.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.70.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.70.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.71.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.71.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.71.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.72.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.72.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.72.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.73.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.73.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.73.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.74.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.74.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.74.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.75.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.75.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.75.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.76.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.76.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.76.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.77.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.77.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.77.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.78.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.78.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.78.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.79.gate_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.79.up_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.36.mlp.experts.79.down_proj.weight": "model-00037-of-000051.safetensors", + "model.layers.37.input_layernorm.weight": "model-00038-of-000051.safetensors", + "model.layers.37.pre_mlp_layernorm.weight": "model-00038-of-000051.safetensors", + "model.layers.37.post_attention_layernorm.weight": "model-00038-of-000051.safetensors", + "model.layers.37.post_mlp_layernorm.weight": "model-00038-of-000051.safetensors", + "model.layers.37.self_attn.qkv_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.self_attn.o_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.self_attn.k_layernorm.weight": "model-00038-of-000051.safetensors", + "model.layers.37.self_attn.param_sink_key": "model-00038-of-000051.safetensors", + "model.layers.37.self_attn.param_sink_value": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.gate.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.e_score_correction_bias": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.shared_experts.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.shared_experts.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.shared_experts.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.0.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.0.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.0.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.1.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.1.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.1.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.2.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.2.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.2.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.3.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.3.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.3.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.4.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.4.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.4.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.5.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.5.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.5.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.6.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.6.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.6.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.7.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.7.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.7.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.8.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.8.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.8.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.9.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.9.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.9.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.10.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.10.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.10.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.11.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.11.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.11.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.12.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.12.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.12.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.13.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.13.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.13.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.14.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.14.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.14.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.15.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.15.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.15.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.16.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.16.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.16.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.17.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.17.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.17.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.18.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.18.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.18.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.19.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.19.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.19.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.20.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.20.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.20.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.21.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.21.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.21.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.22.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.22.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.22.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.23.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.23.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.23.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.24.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.24.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.24.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.25.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.25.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.25.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.26.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.26.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.26.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.27.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.27.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.27.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.28.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.28.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.28.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.29.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.29.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.29.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.30.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.30.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.30.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.31.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.31.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.31.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.32.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.32.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.32.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.33.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.33.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.33.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.34.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.34.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.34.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.35.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.35.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.35.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.36.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.36.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.36.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.37.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.37.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.37.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.38.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.38.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.38.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.39.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.39.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.39.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.40.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.40.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.40.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.41.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.41.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.41.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.42.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.42.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.42.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.43.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.43.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.43.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.44.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.44.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.44.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.45.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.45.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.45.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.46.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.46.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.46.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.47.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.47.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.47.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.48.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.48.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.48.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.49.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.49.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.49.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.50.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.50.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.50.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.51.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.51.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.51.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.52.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.52.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.52.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.53.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.53.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.53.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.54.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.54.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.54.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.55.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.55.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.55.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.56.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.56.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.56.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.57.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.57.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.57.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.58.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.58.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.58.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.59.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.59.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.59.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.60.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.60.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.60.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.61.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.61.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.61.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.62.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.62.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.62.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.63.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.63.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.63.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.64.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.64.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.64.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.65.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.65.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.65.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.66.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.66.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.66.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.67.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.67.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.67.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.68.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.68.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.68.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.69.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.69.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.69.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.70.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.70.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.70.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.71.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.71.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.71.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.72.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.72.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.72.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.73.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.73.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.73.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.74.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.74.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.74.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.75.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.75.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.75.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.76.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.76.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.76.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.77.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.77.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.77.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.78.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.78.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.78.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.79.gate_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.79.up_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.37.mlp.experts.79.down_proj.weight": "model-00038-of-000051.safetensors", + "model.layers.38.input_layernorm.weight": "model-00039-of-000051.safetensors", + "model.layers.38.pre_mlp_layernorm.weight": "model-00039-of-000051.safetensors", + "model.layers.38.post_attention_layernorm.weight": "model-00039-of-000051.safetensors", + "model.layers.38.post_mlp_layernorm.weight": "model-00039-of-000051.safetensors", + "model.layers.38.self_attn.qkv_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.self_attn.o_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.self_attn.k_layernorm.weight": "model-00039-of-000051.safetensors", + "model.layers.38.self_attn.param_sink_key": "model-00039-of-000051.safetensors", + "model.layers.38.self_attn.param_sink_value": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.gate.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.e_score_correction_bias": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.shared_experts.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.shared_experts.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.shared_experts.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.0.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.0.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.0.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.1.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.1.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.1.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.2.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.2.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.2.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.3.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.3.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.3.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.4.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.4.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.4.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.5.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.5.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.5.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.6.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.6.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.6.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.7.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.7.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.7.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.8.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.8.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.8.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.9.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.9.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.9.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.10.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.10.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.10.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.11.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.11.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.11.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.12.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.12.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.12.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.13.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.13.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.13.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.14.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.14.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.14.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.15.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.15.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.15.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.16.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.16.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.16.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.17.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.17.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.17.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.18.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.18.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.18.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.19.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.19.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.19.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.20.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.20.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.20.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.21.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.21.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.21.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.22.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.22.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.22.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.23.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.23.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.23.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.24.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.24.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.24.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.25.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.25.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.25.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.26.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.26.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.26.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.27.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.27.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.27.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.28.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.28.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.28.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.29.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.29.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.29.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.30.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.30.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.30.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.31.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.31.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.31.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.32.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.32.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.32.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.33.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.33.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.33.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.34.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.34.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.34.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.35.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.35.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.35.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.36.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.36.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.36.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.37.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.37.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.37.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.38.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.38.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.38.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.39.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.39.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.39.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.40.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.40.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.40.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.41.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.41.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.41.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.42.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.42.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.42.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.43.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.43.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.43.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.44.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.44.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.44.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.45.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.45.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.45.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.46.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.46.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.46.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.47.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.47.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.47.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.48.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.48.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.48.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.49.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.49.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.49.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.50.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.50.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.50.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.51.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.51.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.51.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.52.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.52.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.52.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.53.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.53.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.53.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.54.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.54.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.54.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.55.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.55.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.55.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.56.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.56.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.56.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.57.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.57.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.57.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.58.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.58.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.58.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.59.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.59.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.59.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.60.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.60.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.60.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.61.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.61.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.61.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.62.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.62.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.62.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.63.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.63.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.63.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.64.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.64.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.64.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.65.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.65.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.65.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.66.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.66.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.66.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.67.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.67.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.67.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.68.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.68.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.68.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.69.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.69.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.69.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.70.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.70.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.70.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.71.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.71.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.71.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.72.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.72.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.72.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.73.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.73.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.73.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.74.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.74.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.74.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.75.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.75.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.75.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.76.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.76.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.76.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.77.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.77.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.77.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.78.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.78.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.78.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.79.gate_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.79.up_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.38.mlp.experts.79.down_proj.weight": "model-00039-of-000051.safetensors", + "model.layers.39.input_layernorm.weight": "model-00040-of-000051.safetensors", + "model.layers.39.pre_mlp_layernorm.weight": "model-00040-of-000051.safetensors", + "model.layers.39.post_attention_layernorm.weight": "model-00040-of-000051.safetensors", + "model.layers.39.post_mlp_layernorm.weight": "model-00040-of-000051.safetensors", + "model.layers.39.self_attn.qkv_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.self_attn.o_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.self_attn.k_layernorm.weight": "model-00040-of-000051.safetensors", + "model.layers.39.self_attn.param_sink_key": "model-00040-of-000051.safetensors", + "model.layers.39.self_attn.param_sink_value": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.gate.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.e_score_correction_bias": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.shared_experts.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.shared_experts.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.shared_experts.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.0.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.0.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.0.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.1.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.1.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.1.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.2.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.2.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.2.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.3.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.3.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.3.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.4.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.4.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.4.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.5.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.5.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.5.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.6.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.6.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.6.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.7.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.7.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.7.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.8.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.8.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.8.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.9.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.9.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.9.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.10.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.10.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.10.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.11.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.11.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.11.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.12.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.12.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.12.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.13.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.13.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.13.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.14.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.14.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.14.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.15.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.15.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.15.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.16.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.16.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.16.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.17.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.17.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.17.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.18.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.18.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.18.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.19.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.19.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.19.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.20.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.20.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.20.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.21.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.21.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.21.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.22.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.22.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.22.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.23.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.23.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.23.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.24.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.24.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.24.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.25.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.25.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.25.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.26.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.26.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.26.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.27.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.27.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.27.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.28.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.28.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.28.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.29.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.29.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.29.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.30.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.30.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.30.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.31.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.31.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.31.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.32.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.32.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.32.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.33.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.33.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.33.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.34.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.34.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.34.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.35.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.35.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.35.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.36.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.36.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.36.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.37.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.37.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.37.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.38.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.38.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.38.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.39.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.39.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.39.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.40.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.40.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.40.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.41.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.41.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.41.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.42.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.42.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.42.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.43.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.43.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.43.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.44.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.44.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.44.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.45.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.45.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.45.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.46.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.46.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.46.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.47.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.47.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.47.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.48.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.48.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.48.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.49.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.49.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.49.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.50.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.50.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.50.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.51.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.51.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.51.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.52.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.52.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.52.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.53.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.53.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.53.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.54.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.54.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.54.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.55.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.55.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.55.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.56.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.56.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.56.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.57.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.57.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.57.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.58.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.58.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.58.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.59.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.59.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.59.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.60.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.60.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.60.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.61.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.61.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.61.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.62.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.62.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.62.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.63.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.63.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.63.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.64.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.64.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.64.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.65.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.65.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.65.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.66.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.66.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.66.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.67.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.67.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.67.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.68.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.68.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.68.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.69.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.69.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.69.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.70.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.70.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.70.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.71.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.71.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.71.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.72.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.72.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.72.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.73.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.73.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.73.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.74.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.74.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.74.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.75.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.75.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.75.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.76.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.76.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.76.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.77.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.77.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.77.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.78.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.78.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.78.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.79.gate_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.79.up_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.39.mlp.experts.79.down_proj.weight": "model-00040-of-000051.safetensors", + "model.layers.40.input_layernorm.weight": "model-00041-of-000051.safetensors", + "model.layers.40.pre_mlp_layernorm.weight": "model-00041-of-000051.safetensors", + "model.layers.40.post_attention_layernorm.weight": "model-00041-of-000051.safetensors", + "model.layers.40.post_mlp_layernorm.weight": "model-00041-of-000051.safetensors", + "model.layers.40.self_attn.qkv_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.self_attn.o_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.self_attn.k_layernorm.weight": "model-00041-of-000051.safetensors", + "model.layers.40.self_attn.param_sink_key": "model-00041-of-000051.safetensors", + "model.layers.40.self_attn.param_sink_value": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.gate.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.e_score_correction_bias": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.shared_experts.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.shared_experts.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.shared_experts.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.0.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.0.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.0.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.1.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.1.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.1.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.2.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.2.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.2.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.3.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.3.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.3.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.4.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.4.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.4.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.5.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.5.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.5.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.6.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.6.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.6.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.7.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.7.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.7.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.8.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.8.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.8.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.9.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.9.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.9.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.10.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.10.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.10.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.11.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.11.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.11.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.12.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.12.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.12.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.13.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.13.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.13.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.14.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.14.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.14.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.15.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.15.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.15.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.16.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.16.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.16.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.17.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.17.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.17.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.18.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.18.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.18.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.19.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.19.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.19.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.20.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.20.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.20.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.21.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.21.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.21.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.22.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.22.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.22.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.23.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.23.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.23.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.24.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.24.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.24.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.25.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.25.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.25.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.26.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.26.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.26.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.27.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.27.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.27.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.28.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.28.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.28.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.29.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.29.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.29.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.30.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.30.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.30.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.31.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.31.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.31.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.32.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.32.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.32.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.33.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.33.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.33.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.34.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.34.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.34.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.35.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.35.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.35.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.36.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.36.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.36.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.37.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.37.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.37.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.38.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.38.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.38.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.39.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.39.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.39.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.40.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.40.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.40.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.41.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.41.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.41.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.42.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.42.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.42.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.43.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.43.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.43.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.44.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.44.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.44.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.45.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.45.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.45.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.46.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.46.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.46.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.47.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.47.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.47.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.48.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.48.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.48.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.49.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.49.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.49.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.50.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.50.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.50.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.51.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.51.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.51.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.52.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.52.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.52.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.53.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.53.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.53.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.54.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.54.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.54.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.55.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.55.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.55.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.56.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.56.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.56.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.57.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.57.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.57.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.58.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.58.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.58.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.59.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.59.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.59.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.60.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.60.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.60.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.61.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.61.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.61.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.62.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.62.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.62.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.63.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.63.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.63.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.64.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.64.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.64.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.65.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.65.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.65.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.66.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.66.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.66.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.67.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.67.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.67.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.68.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.68.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.68.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.69.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.69.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.69.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.70.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.70.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.70.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.71.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.71.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.71.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.72.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.72.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.72.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.73.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.73.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.73.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.74.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.74.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.74.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.75.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.75.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.75.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.76.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.76.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.76.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.77.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.77.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.77.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.78.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.78.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.78.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.79.gate_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.79.up_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.40.mlp.experts.79.down_proj.weight": "model-00041-of-000051.safetensors", + "model.layers.41.input_layernorm.weight": "model-00042-of-000051.safetensors", + "model.layers.41.pre_mlp_layernorm.weight": "model-00042-of-000051.safetensors", + "model.layers.41.post_attention_layernorm.weight": "model-00042-of-000051.safetensors", + "model.layers.41.post_mlp_layernorm.weight": "model-00042-of-000051.safetensors", + "model.layers.41.self_attn.qkv_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.self_attn.o_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.self_attn.k_layernorm.weight": "model-00042-of-000051.safetensors", + "model.layers.41.self_attn.param_sink_key": "model-00042-of-000051.safetensors", + "model.layers.41.self_attn.param_sink_value": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.gate.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.e_score_correction_bias": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.shared_experts.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.shared_experts.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.shared_experts.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.0.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.0.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.0.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.1.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.1.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.1.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.2.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.2.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.2.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.3.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.3.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.3.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.4.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.4.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.4.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.5.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.5.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.5.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.6.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.6.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.6.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.7.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.7.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.7.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.8.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.8.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.8.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.9.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.9.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.9.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.10.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.10.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.10.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.11.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.11.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.11.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.12.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.12.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.12.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.13.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.13.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.13.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.14.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.14.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.14.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.15.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.15.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.15.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.16.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.16.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.16.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.17.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.17.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.17.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.18.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.18.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.18.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.19.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.19.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.19.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.20.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.20.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.20.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.21.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.21.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.21.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.22.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.22.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.22.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.23.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.23.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.23.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.24.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.24.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.24.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.25.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.25.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.25.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.26.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.26.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.26.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.27.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.27.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.27.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.28.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.28.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.28.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.29.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.29.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.29.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.30.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.30.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.30.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.31.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.31.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.31.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.32.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.32.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.32.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.33.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.33.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.33.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.34.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.34.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.34.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.35.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.35.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.35.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.36.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.36.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.36.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.37.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.37.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.37.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.38.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.38.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.38.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.39.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.39.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.39.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.40.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.40.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.40.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.41.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.41.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.41.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.42.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.42.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.42.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.43.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.43.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.43.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.44.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.44.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.44.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.45.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.45.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.45.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.46.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.46.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.46.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.47.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.47.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.47.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.48.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.48.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.48.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.49.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.49.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.49.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.50.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.50.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.50.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.51.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.51.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.51.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.52.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.52.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.52.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.53.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.53.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.53.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.54.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.54.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.54.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.55.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.55.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.55.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.56.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.56.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.56.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.57.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.57.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.57.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.58.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.58.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.58.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.59.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.59.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.59.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.60.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.60.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.60.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.61.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.61.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.61.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.62.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.62.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.62.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.63.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.63.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.63.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.64.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.64.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.64.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.65.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.65.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.65.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.66.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.66.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.66.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.67.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.67.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.67.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.68.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.68.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.68.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.69.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.69.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.69.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.70.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.70.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.70.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.71.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.71.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.71.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.72.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.72.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.72.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.73.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.73.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.73.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.74.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.74.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.74.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.75.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.75.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.75.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.76.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.76.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.76.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.77.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.77.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.77.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.78.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.78.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.78.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.79.gate_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.79.up_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.41.mlp.experts.79.down_proj.weight": "model-00042-of-000051.safetensors", + "model.layers.42.input_layernorm.weight": "model-00043-of-000051.safetensors", + "model.layers.42.pre_mlp_layernorm.weight": "model-00043-of-000051.safetensors", + "model.layers.42.post_attention_layernorm.weight": "model-00043-of-000051.safetensors", + "model.layers.42.post_mlp_layernorm.weight": "model-00043-of-000051.safetensors", + "model.layers.42.self_attn.qkv_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.self_attn.o_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.self_attn.k_layernorm.weight": "model-00043-of-000051.safetensors", + "model.layers.42.self_attn.param_sink_key": "model-00043-of-000051.safetensors", + "model.layers.42.self_attn.param_sink_value": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.gate.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.e_score_correction_bias": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.shared_experts.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.shared_experts.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.shared_experts.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.0.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.0.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.0.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.1.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.1.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.1.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.2.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.2.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.2.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.3.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.3.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.3.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.4.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.4.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.4.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.5.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.5.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.5.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.6.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.6.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.6.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.7.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.7.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.7.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.8.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.8.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.8.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.9.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.9.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.9.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.10.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.10.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.10.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.11.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.11.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.11.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.12.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.12.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.12.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.13.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.13.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.13.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.14.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.14.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.14.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.15.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.15.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.15.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.16.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.16.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.16.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.17.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.17.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.17.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.18.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.18.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.18.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.19.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.19.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.19.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.20.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.20.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.20.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.21.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.21.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.21.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.22.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.22.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.22.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.23.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.23.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.23.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.24.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.24.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.24.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.25.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.25.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.25.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.26.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.26.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.26.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.27.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.27.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.27.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.28.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.28.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.28.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.29.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.29.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.29.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.30.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.30.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.30.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.31.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.31.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.31.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.32.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.32.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.32.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.33.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.33.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.33.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.34.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.34.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.34.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.35.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.35.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.35.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.36.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.36.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.36.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.37.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.37.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.37.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.38.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.38.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.38.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.39.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.39.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.39.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.40.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.40.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.40.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.41.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.41.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.41.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.42.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.42.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.42.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.43.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.43.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.43.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.44.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.44.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.44.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.45.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.45.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.45.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.46.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.46.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.46.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.47.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.47.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.47.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.48.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.48.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.48.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.49.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.49.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.49.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.50.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.50.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.50.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.51.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.51.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.51.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.52.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.52.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.52.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.53.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.53.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.53.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.54.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.54.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.54.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.55.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.55.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.55.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.56.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.56.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.56.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.57.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.57.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.57.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.58.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.58.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.58.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.59.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.59.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.59.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.60.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.60.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.60.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.61.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.61.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.61.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.62.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.62.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.62.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.63.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.63.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.63.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.64.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.64.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.64.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.65.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.65.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.65.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.66.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.66.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.66.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.67.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.67.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.67.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.68.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.68.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.68.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.69.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.69.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.69.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.70.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.70.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.70.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.71.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.71.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.71.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.72.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.72.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.72.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.73.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.73.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.73.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.74.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.74.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.74.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.75.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.75.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.75.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.76.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.76.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.76.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.77.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.77.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.77.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.78.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.78.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.78.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.79.gate_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.79.up_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.42.mlp.experts.79.down_proj.weight": "model-00043-of-000051.safetensors", + "model.layers.43.input_layernorm.weight": "model-00044-of-000051.safetensors", + "model.layers.43.pre_mlp_layernorm.weight": "model-00044-of-000051.safetensors", + "model.layers.43.post_attention_layernorm.weight": "model-00044-of-000051.safetensors", + "model.layers.43.post_mlp_layernorm.weight": "model-00044-of-000051.safetensors", + "model.layers.43.self_attn.qkv_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.self_attn.o_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.self_attn.k_layernorm.weight": "model-00044-of-000051.safetensors", + "model.layers.43.self_attn.param_sink_key": "model-00044-of-000051.safetensors", + "model.layers.43.self_attn.param_sink_value": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.gate.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.e_score_correction_bias": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.shared_experts.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.shared_experts.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.shared_experts.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.0.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.0.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.0.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.1.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.1.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.1.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.2.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.2.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.2.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.3.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.3.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.3.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.4.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.4.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.4.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.5.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.5.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.5.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.6.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.6.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.6.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.7.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.7.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.7.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.8.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.8.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.8.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.9.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.9.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.9.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.10.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.10.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.10.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.11.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.11.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.11.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.12.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.12.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.12.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.13.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.13.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.13.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.14.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.14.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.14.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.15.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.15.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.15.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.16.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.16.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.16.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.17.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.17.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.17.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.18.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.18.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.18.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.19.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.19.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.19.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.20.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.20.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.20.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.21.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.21.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.21.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.22.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.22.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.22.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.23.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.23.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.23.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.24.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.24.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.24.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.25.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.25.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.25.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.26.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.26.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.26.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.27.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.27.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.27.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.28.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.28.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.28.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.29.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.29.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.29.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.30.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.30.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.30.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.31.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.31.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.31.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.32.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.32.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.32.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.33.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.33.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.33.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.34.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.34.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.34.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.35.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.35.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.35.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.36.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.36.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.36.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.37.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.37.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.37.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.38.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.38.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.38.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.39.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.39.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.39.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.40.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.40.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.40.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.41.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.41.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.41.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.42.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.42.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.42.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.43.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.43.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.43.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.44.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.44.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.44.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.45.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.45.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.45.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.46.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.46.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.46.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.47.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.47.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.47.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.48.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.48.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.48.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.49.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.49.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.49.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.50.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.50.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.50.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.51.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.51.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.51.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.52.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.52.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.52.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.53.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.53.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.53.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.54.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.54.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.54.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.55.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.55.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.55.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.56.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.56.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.56.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.57.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.57.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.57.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.58.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.58.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.58.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.59.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.59.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.59.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.60.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.60.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.60.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.61.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.61.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.61.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.62.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.62.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.62.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.63.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.63.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.63.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.64.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.64.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.64.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.65.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.65.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.65.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.66.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.66.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.66.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.67.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.67.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.67.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.68.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.68.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.68.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.69.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.69.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.69.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.70.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.70.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.70.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.71.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.71.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.71.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.72.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.72.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.72.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.73.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.73.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.73.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.74.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.74.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.74.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.75.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.75.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.75.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.76.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.76.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.76.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.77.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.77.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.77.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.78.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.78.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.78.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.79.gate_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.79.up_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.43.mlp.experts.79.down_proj.weight": "model-00044-of-000051.safetensors", + "model.layers.44.input_layernorm.weight": "model-00045-of-000051.safetensors", + "model.layers.44.pre_mlp_layernorm.weight": "model-00045-of-000051.safetensors", + "model.layers.44.post_attention_layernorm.weight": "model-00045-of-000051.safetensors", + "model.layers.44.post_mlp_layernorm.weight": "model-00045-of-000051.safetensors", + "model.layers.44.self_attn.qkv_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.self_attn.o_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.self_attn.k_layernorm.weight": "model-00045-of-000051.safetensors", + "model.layers.44.self_attn.param_sink_key": "model-00045-of-000051.safetensors", + "model.layers.44.self_attn.param_sink_value": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.gate.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.e_score_correction_bias": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.shared_experts.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.shared_experts.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.shared_experts.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.0.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.0.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.0.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.1.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.1.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.1.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.2.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.2.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.2.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.3.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.3.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.3.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.4.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.4.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.4.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.5.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.5.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.5.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.6.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.6.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.6.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.7.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.7.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.7.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.8.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.8.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.8.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.9.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.9.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.9.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.10.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.10.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.10.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.11.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.11.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.11.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.12.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.12.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.12.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.13.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.13.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.13.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.14.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.14.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.14.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.15.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.15.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.15.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.16.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.16.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.16.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.17.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.17.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.17.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.18.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.18.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.18.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.19.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.19.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.19.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.20.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.20.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.20.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.21.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.21.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.21.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.22.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.22.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.22.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.23.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.23.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.23.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.24.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.24.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.24.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.25.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.25.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.25.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.26.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.26.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.26.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.27.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.27.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.27.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.28.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.28.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.28.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.29.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.29.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.29.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.30.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.30.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.30.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.31.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.31.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.31.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.32.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.32.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.32.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.33.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.33.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.33.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.34.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.34.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.34.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.35.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.35.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.35.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.36.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.36.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.36.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.37.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.37.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.37.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.38.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.38.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.38.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.39.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.39.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.39.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.40.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.40.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.40.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.41.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.41.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.41.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.42.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.42.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.42.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.43.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.43.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.43.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.44.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.44.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.44.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.45.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.45.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.45.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.46.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.46.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.46.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.47.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.47.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.47.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.48.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.48.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.48.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.49.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.49.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.49.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.50.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.50.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.50.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.51.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.51.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.51.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.52.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.52.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.52.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.53.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.53.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.53.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.54.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.54.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.54.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.55.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.55.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.55.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.56.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.56.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.56.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.57.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.57.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.57.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.58.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.58.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.58.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.59.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.59.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.59.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.60.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.60.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.60.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.61.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.61.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.61.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.62.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.62.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.62.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.63.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.63.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.63.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.64.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.64.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.64.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.65.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.65.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.65.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.66.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.66.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.66.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.67.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.67.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.67.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.68.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.68.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.68.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.69.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.69.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.69.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.70.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.70.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.70.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.71.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.71.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.71.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.72.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.72.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.72.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.73.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.73.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.73.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.74.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.74.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.74.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.75.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.75.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.75.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.76.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.76.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.76.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.77.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.77.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.77.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.78.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.78.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.78.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.79.gate_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.79.up_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.44.mlp.experts.79.down_proj.weight": "model-00045-of-000051.safetensors", + "model.layers.45.input_layernorm.weight": "model-00046-of-000051.safetensors", + "model.layers.45.pre_mlp_layernorm.weight": "model-00046-of-000051.safetensors", + "model.layers.45.post_attention_layernorm.weight": "model-00046-of-000051.safetensors", + "model.layers.45.post_mlp_layernorm.weight": "model-00046-of-000051.safetensors", + "model.layers.45.self_attn.qkv_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.self_attn.o_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.self_attn.k_layernorm.weight": "model-00046-of-000051.safetensors", + "model.layers.45.self_attn.param_sink_key": "model-00046-of-000051.safetensors", + "model.layers.45.self_attn.param_sink_value": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.gate.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.e_score_correction_bias": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.shared_experts.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.shared_experts.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.shared_experts.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.0.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.0.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.0.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.1.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.1.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.1.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.2.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.2.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.2.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.3.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.3.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.3.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.4.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.4.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.4.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.5.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.5.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.5.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.6.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.6.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.6.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.7.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.7.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.7.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.8.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.8.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.8.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.9.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.9.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.9.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.10.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.10.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.10.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.11.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.11.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.11.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.12.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.12.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.12.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.13.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.13.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.13.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.14.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.14.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.14.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.15.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.15.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.15.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.16.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.16.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.16.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.17.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.17.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.17.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.18.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.18.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.18.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.19.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.19.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.19.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.20.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.20.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.20.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.21.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.21.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.21.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.22.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.22.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.22.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.23.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.23.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.23.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.24.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.24.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.24.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.25.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.25.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.25.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.26.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.26.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.26.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.27.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.27.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.27.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.28.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.28.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.28.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.29.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.29.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.29.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.30.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.30.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.30.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.31.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.31.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.31.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.32.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.32.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.32.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.33.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.33.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.33.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.34.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.34.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.34.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.35.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.35.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.35.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.36.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.36.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.36.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.37.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.37.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.37.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.38.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.38.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.38.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.39.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.39.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.39.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.40.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.40.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.40.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.41.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.41.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.41.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.42.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.42.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.42.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.43.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.43.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.43.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.44.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.44.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.44.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.45.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.45.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.45.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.46.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.46.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.46.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.47.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.47.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.47.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.48.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.48.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.48.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.49.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.49.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.49.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.50.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.50.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.50.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.51.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.51.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.51.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.52.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.52.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.52.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.53.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.53.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.53.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.54.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.54.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.54.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.55.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.55.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.55.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.56.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.56.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.56.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.57.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.57.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.57.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.58.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.58.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.58.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.59.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.59.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.59.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.60.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.60.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.60.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.61.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.61.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.61.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.62.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.62.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.62.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.63.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.63.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.63.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.64.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.64.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.64.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.65.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.65.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.65.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.66.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.66.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.66.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.67.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.67.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.67.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.68.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.68.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.68.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.69.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.69.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.69.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.70.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.70.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.70.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.71.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.71.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.71.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.72.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.72.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.72.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.73.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.73.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.73.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.74.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.74.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.74.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.75.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.75.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.75.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.76.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.76.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.76.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.77.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.77.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.77.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.78.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.78.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.78.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.79.gate_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.79.up_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.45.mlp.experts.79.down_proj.weight": "model-00046-of-000051.safetensors", + "model.layers.46.input_layernorm.weight": "model-00047-of-000051.safetensors", + "model.layers.46.pre_mlp_layernorm.weight": "model-00047-of-000051.safetensors", + "model.layers.46.post_attention_layernorm.weight": "model-00047-of-000051.safetensors", + "model.layers.46.post_mlp_layernorm.weight": "model-00047-of-000051.safetensors", + "model.layers.46.self_attn.qkv_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.self_attn.o_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.self_attn.k_layernorm.weight": "model-00047-of-000051.safetensors", + "model.layers.46.self_attn.param_sink_key": "model-00047-of-000051.safetensors", + "model.layers.46.self_attn.param_sink_value": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.gate.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.e_score_correction_bias": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.shared_experts.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.shared_experts.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.shared_experts.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.0.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.0.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.0.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.1.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.1.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.1.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.2.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.2.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.2.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.3.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.3.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.3.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.4.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.4.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.4.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.5.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.5.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.5.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.6.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.6.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.6.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.7.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.7.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.7.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.8.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.8.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.8.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.9.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.9.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.9.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.10.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.10.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.10.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.11.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.11.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.11.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.12.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.12.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.12.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.13.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.13.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.13.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.14.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.14.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.14.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.15.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.15.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.15.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.16.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.16.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.16.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.17.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.17.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.17.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.18.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.18.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.18.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.19.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.19.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.19.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.20.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.20.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.20.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.21.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.21.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.21.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.22.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.22.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.22.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.23.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.23.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.23.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.24.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.24.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.24.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.25.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.25.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.25.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.26.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.26.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.26.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.27.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.27.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.27.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.28.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.28.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.28.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.29.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.29.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.29.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.30.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.30.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.30.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.31.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.31.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.31.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.32.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.32.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.32.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.33.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.33.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.33.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.34.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.34.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.34.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.35.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.35.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.35.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.36.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.36.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.36.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.37.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.37.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.37.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.38.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.38.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.38.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.39.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.39.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.39.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.40.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.40.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.40.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.41.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.41.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.41.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.42.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.42.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.42.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.43.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.43.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.43.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.44.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.44.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.44.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.45.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.45.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.45.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.46.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.46.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.46.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.47.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.47.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.47.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.48.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.48.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.48.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.49.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.49.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.49.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.50.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.50.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.50.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.51.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.51.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.51.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.52.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.52.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.52.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.53.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.53.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.53.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.54.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.54.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.54.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.55.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.55.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.55.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.56.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.56.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.56.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.57.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.57.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.57.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.58.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.58.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.58.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.59.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.59.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.59.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.60.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.60.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.60.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.61.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.61.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.61.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.62.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.62.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.62.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.63.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.63.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.63.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.64.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.64.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.64.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.65.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.65.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.65.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.66.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.66.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.66.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.67.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.67.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.67.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.68.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.68.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.68.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.69.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.69.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.69.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.70.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.70.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.70.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.71.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.71.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.71.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.72.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.72.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.72.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.73.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.73.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.73.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.74.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.74.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.74.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.75.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.75.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.75.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.76.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.76.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.76.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.77.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.77.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.77.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.78.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.78.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.78.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.79.gate_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.79.up_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.46.mlp.experts.79.down_proj.weight": "model-00047-of-000051.safetensors", + "model.layers.47.input_layernorm.weight": "model-00048-of-000051.safetensors", + "model.layers.47.pre_mlp_layernorm.weight": "model-00048-of-000051.safetensors", + "model.layers.47.post_attention_layernorm.weight": "model-00048-of-000051.safetensors", + "model.layers.47.post_mlp_layernorm.weight": "model-00048-of-000051.safetensors", + "model.layers.47.self_attn.qkv_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.self_attn.o_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.self_attn.k_layernorm.weight": "model-00048-of-000051.safetensors", + "model.layers.47.self_attn.param_sink_key": "model-00048-of-000051.safetensors", + "model.layers.47.self_attn.param_sink_value": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.gate.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.e_score_correction_bias": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.shared_experts.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.shared_experts.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.shared_experts.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.0.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.0.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.0.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.1.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.1.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.1.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.2.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.2.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.2.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.3.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.3.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.3.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.4.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.4.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.4.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.5.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.5.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.5.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.6.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.6.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.6.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.7.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.7.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.7.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.8.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.8.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.8.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.9.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.9.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.9.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.10.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.10.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.10.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.11.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.11.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.11.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.12.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.12.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.12.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.13.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.13.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.13.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.14.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.14.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.14.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.15.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.15.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.15.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.16.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.16.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.16.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.17.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.17.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.17.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.18.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.18.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.18.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.19.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.19.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.19.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.20.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.20.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.20.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.21.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.21.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.21.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.22.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.22.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.22.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.23.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.23.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.23.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.24.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.24.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.24.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.25.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.25.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.25.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.26.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.26.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.26.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.27.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.27.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.27.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.28.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.28.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.28.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.29.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.29.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.29.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.30.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.30.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.30.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.31.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.31.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.31.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.32.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.32.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.32.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.33.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.33.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.33.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.34.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.34.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.34.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.35.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.35.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.35.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.36.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.36.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.36.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.37.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.37.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.37.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.38.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.38.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.38.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.39.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.39.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.39.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.40.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.40.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.40.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.41.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.41.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.41.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.42.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.42.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.42.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.43.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.43.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.43.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.44.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.44.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.44.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.45.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.45.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.45.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.46.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.46.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.46.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.47.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.47.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.47.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.48.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.48.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.48.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.49.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.49.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.49.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.50.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.50.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.50.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.51.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.51.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.51.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.52.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.52.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.52.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.53.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.53.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.53.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.54.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.54.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.54.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.55.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.55.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.55.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.56.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.56.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.56.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.57.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.57.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.57.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.58.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.58.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.58.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.59.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.59.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.59.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.60.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.60.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.60.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.61.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.61.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.61.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.62.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.62.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.62.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.63.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.63.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.63.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.64.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.64.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.64.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.65.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.65.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.65.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.66.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.66.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.66.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.67.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.67.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.67.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.68.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.68.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.68.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.69.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.69.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.69.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.70.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.70.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.70.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.71.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.71.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.71.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.72.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.72.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.72.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.73.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.73.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.73.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.74.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.74.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.74.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.75.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.75.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.75.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.76.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.76.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.76.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.77.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.77.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.77.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.78.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.78.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.78.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.79.gate_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.79.up_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.47.mlp.experts.79.down_proj.weight": "model-00048-of-000051.safetensors", + "model.layers.48.input_layernorm.weight": "model-00049-of-000051.safetensors", + "model.layers.48.pre_mlp_layernorm.weight": "model-00049-of-000051.safetensors", + "model.layers.48.post_attention_layernorm.weight": "model-00049-of-000051.safetensors", + "model.layers.48.post_mlp_layernorm.weight": "model-00049-of-000051.safetensors", + "model.layers.48.self_attn.qkv_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.self_attn.o_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.self_attn.k_layernorm.weight": "model-00049-of-000051.safetensors", + "model.layers.48.self_attn.param_sink_key": "model-00049-of-000051.safetensors", + "model.layers.48.self_attn.param_sink_value": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.gate.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.e_score_correction_bias": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.shared_experts.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.shared_experts.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.shared_experts.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.0.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.0.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.0.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.1.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.1.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.1.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.2.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.2.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.2.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.3.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.3.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.3.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.4.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.4.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.4.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.5.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.5.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.5.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.6.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.6.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.6.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.7.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.7.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.7.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.8.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.8.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.8.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.9.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.9.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.9.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.10.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.10.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.10.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.11.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.11.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.11.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.12.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.12.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.12.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.13.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.13.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.13.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.14.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.14.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.14.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.15.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.15.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.15.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.16.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.16.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.16.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.17.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.17.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.17.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.18.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.18.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.18.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.19.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.19.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.19.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.20.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.20.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.20.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.21.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.21.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.21.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.22.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.22.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.22.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.23.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.23.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.23.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.24.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.24.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.24.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.25.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.25.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.25.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.26.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.26.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.26.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.27.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.27.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.27.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.28.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.28.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.28.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.29.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.29.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.29.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.30.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.30.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.30.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.31.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.31.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.31.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.32.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.32.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.32.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.33.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.33.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.33.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.34.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.34.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.34.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.35.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.35.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.35.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.36.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.36.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.36.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.37.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.37.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.37.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.38.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.38.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.38.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.39.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.39.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.39.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.40.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.40.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.40.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.41.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.41.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.41.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.42.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.42.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.42.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.43.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.43.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.43.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.44.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.44.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.44.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.45.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.45.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.45.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.46.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.46.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.46.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.47.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.47.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.47.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.48.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.48.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.48.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.49.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.49.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.49.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.50.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.50.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.50.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.51.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.51.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.51.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.52.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.52.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.52.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.53.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.53.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.53.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.54.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.54.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.54.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.55.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.55.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.55.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.56.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.56.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.56.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.57.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.57.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.57.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.58.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.58.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.58.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.59.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.59.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.59.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.60.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.60.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.60.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.61.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.61.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.61.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.62.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.62.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.62.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.63.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.63.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.63.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.64.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.64.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.64.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.65.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.65.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.65.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.66.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.66.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.66.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.67.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.67.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.67.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.68.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.68.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.68.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.69.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.69.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.69.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.70.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.70.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.70.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.71.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.71.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.71.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.72.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.72.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.72.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.73.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.73.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.73.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.74.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.74.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.74.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.75.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.75.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.75.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.76.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.76.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.76.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.77.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.77.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.77.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.78.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.78.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.78.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.79.gate_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.79.up_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.48.mlp.experts.79.down_proj.weight": "model-00049-of-000051.safetensors", + "model.layers.49.input_layernorm.weight": "model-00050-of-000051.safetensors", + "model.layers.49.pre_mlp_layernorm.weight": "model-00050-of-000051.safetensors", + "model.layers.49.post_attention_layernorm.weight": "model-00050-of-000051.safetensors", + "model.layers.49.post_mlp_layernorm.weight": "model-00050-of-000051.safetensors", + "model.layers.49.self_attn.qkv_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.self_attn.o_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.self_attn.k_layernorm.weight": "model-00050-of-000051.safetensors", + "model.layers.49.self_attn.param_sink_key": "model-00050-of-000051.safetensors", + "model.layers.49.self_attn.param_sink_value": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.gate.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.e_score_correction_bias": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.shared_experts.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.shared_experts.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.shared_experts.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.0.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.0.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.0.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.1.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.1.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.1.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.2.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.2.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.2.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.3.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.3.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.3.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.4.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.4.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.4.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.5.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.5.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.5.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.6.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.6.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.6.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.7.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.7.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.7.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.8.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.8.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.8.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.9.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.9.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.9.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.10.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.10.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.10.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.11.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.11.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.11.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.12.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.12.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.12.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.13.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.13.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.13.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.14.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.14.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.14.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.15.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.15.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.15.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.16.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.16.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.16.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.17.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.17.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.17.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.18.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.18.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.18.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.19.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.19.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.19.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.20.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.20.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.20.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.21.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.21.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.21.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.22.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.22.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.22.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.23.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.23.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.23.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.24.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.24.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.24.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.25.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.25.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.25.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.26.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.26.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.26.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.27.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.27.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.27.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.28.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.28.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.28.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.29.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.29.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.29.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.30.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.30.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.30.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.31.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.31.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.31.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.32.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.32.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.32.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.33.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.33.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.33.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.34.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.34.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.34.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.35.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.35.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.35.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.36.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.36.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.36.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.37.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.37.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.37.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.38.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.38.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.38.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.39.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.39.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.39.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.40.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.40.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.40.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.41.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.41.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.41.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.42.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.42.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.42.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.43.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.43.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.43.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.44.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.44.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.44.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.45.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.45.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.45.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.46.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.46.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.46.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.47.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.47.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.47.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.48.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.48.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.48.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.49.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.49.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.49.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.50.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.50.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.50.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.51.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.51.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.51.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.52.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.52.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.52.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.53.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.53.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.53.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.54.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.54.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.54.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.55.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.55.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.55.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.56.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.56.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.56.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.57.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.57.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.57.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.58.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.58.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.58.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.59.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.59.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.59.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.60.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.60.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.60.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.61.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.61.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.61.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.62.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.62.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.62.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.63.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.63.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.63.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.64.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.64.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.64.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.65.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.65.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.65.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.66.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.66.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.66.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.67.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.67.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.67.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.68.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.68.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.68.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.69.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.69.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.69.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.70.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.70.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.70.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.71.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.71.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.71.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.72.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.72.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.72.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.73.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.73.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.73.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.74.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.74.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.74.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.75.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.75.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.75.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.76.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.76.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.76.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.77.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.77.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.77.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.78.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.78.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.78.down_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.79.gate_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.79.up_proj.weight": "model-00050-of-000051.safetensors", + "model.layers.49.mlp.experts.79.down_proj.weight": "model-00050-of-000051.safetensors", + "model.norm.weight": "model-00050-of-000051.safetensors", + "lm_head.weight": "model-00050-of-000051.safetensors", + "model.layers.50.embed_tokens.weight": "model-00051-of-000051.safetensors", + "model.layers.50.enorm.weight": "model-00051-of-000051.safetensors", + "model.layers.50.hnorm.weight": "model-00051-of-000051.safetensors", + "model.layers.50.eh_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.shared_head.norm.weight": "model-00051-of-000051.safetensors", + "model.layers.50.shared_head.head.weight": "model-00051-of-000051.safetensors", + "model.layers.50.input_layernorm.weight": "model-00051-of-000051.safetensors", + "model.layers.50.pre_mlp_layernorm.weight": "model-00051-of-000051.safetensors", + "model.layers.50.post_attention_layernorm.weight": "model-00051-of-000051.safetensors", + "model.layers.50.post_mlp_layernorm.weight": "model-00051-of-000051.safetensors", + "model.layers.50.self_attn.qkv_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.self_attn.o_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.self_attn.k_layernorm.weight": "model-00051-of-000051.safetensors", + "model.layers.50.self_attn.param_sink_key": "model-00051-of-000051.safetensors", + "model.layers.50.self_attn.param_sink_value": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.gate.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.e_score_correction_bias": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.shared_experts.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.shared_experts.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.shared_experts.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.0.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.0.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.0.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.1.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.1.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.1.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.2.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.2.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.2.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.3.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.3.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.3.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.4.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.4.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.4.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.5.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.5.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.5.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.6.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.6.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.6.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.7.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.7.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.7.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.8.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.8.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.8.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.9.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.9.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.9.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.10.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.10.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.10.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.11.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.11.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.11.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.12.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.12.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.12.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.13.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.13.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.13.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.14.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.14.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.14.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.15.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.15.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.15.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.16.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.16.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.16.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.17.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.17.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.17.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.18.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.18.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.18.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.19.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.19.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.19.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.20.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.20.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.20.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.21.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.21.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.21.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.22.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.22.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.22.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.23.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.23.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.23.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.24.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.24.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.24.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.25.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.25.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.25.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.26.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.26.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.26.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.27.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.27.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.27.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.28.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.28.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.28.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.29.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.29.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.29.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.30.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.30.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.30.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.31.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.31.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.31.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.32.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.32.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.32.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.33.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.33.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.33.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.34.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.34.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.34.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.35.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.35.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.35.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.36.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.36.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.36.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.37.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.37.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.37.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.38.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.38.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.38.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.39.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.39.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.39.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.40.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.40.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.40.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.41.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.41.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.41.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.42.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.42.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.42.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.43.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.43.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.43.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.44.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.44.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.44.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.45.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.45.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.45.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.46.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.46.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.46.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.47.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.47.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.47.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.48.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.48.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.48.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.49.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.49.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.49.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.50.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.50.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.50.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.51.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.51.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.51.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.52.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.52.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.52.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.53.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.53.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.53.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.54.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.54.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.54.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.55.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.55.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.55.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.56.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.56.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.56.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.57.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.57.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.57.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.58.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.58.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.58.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.59.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.59.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.59.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.60.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.60.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.60.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.61.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.61.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.61.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.62.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.62.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.62.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.63.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.63.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.63.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.64.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.64.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.64.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.65.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.65.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.65.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.66.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.66.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.66.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.67.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.67.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.67.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.68.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.68.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.68.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.69.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.69.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.69.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.70.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.70.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.70.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.71.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.71.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.71.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.72.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.72.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.72.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.73.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.73.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.73.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.74.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.74.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.74.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.75.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.75.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.75.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.76.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.76.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.76.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.77.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.77.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.77.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.78.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.78.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.78.down_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.79.gate_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.79.up_proj.weight": "model-00051-of-000051.safetensors", + "model.layers.50.mlp.experts.79.down_proj.weight": "model-00051-of-000051.safetensors" + } +} \ No newline at end of file diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..451134b2ddc2e78555d1e857518c54b4bdc2e87d --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenization_openpangu.py b/tokenization_openpangu.py new file mode 100644 index 0000000000000000000000000000000000000000..980d0cb1eb116c05c8f72f738c0bb7b64ff6e4cc --- /dev/null +++ b/tokenization_openpangu.py @@ -0,0 +1,273 @@ +# coding=utf-8 +# Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. +# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved. +# +# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX +# and OPT implementations in this library. It has been modified from its +# original forms to accommodate minor architectural differences compared +# to GPT-NeoX and OPT used by the Meta AI team that trained the model. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from shutil import copyfile +from typing import Any, Dict, List, Optional, Tuple + +import sentencepiece as spm + +from transformers.tokenization_utils import PreTrainedTokenizer +from transformers.utils import logging + + +logger = logging.get_logger(__name__) + +VOCAB_FILES_NAMES = {"vocab_file": "./tokenizer.model"} + +PRETRAINED_VOCAB_FILES_MAP = {} + + +def convert_bool(string): + if isinstance(string, str): + if string.lower() == "true": + return True + elif string.lower() == "false": + return False + else: + return string + else: + return string + + +class OpenPanguTokenizer(PreTrainedTokenizer): + """ + Construct a tokenizer. Based on byte-level Byte-Pair-Encoding. + + Args: + vocab_file (`str`): + Path to the vocabulary file. + """ + + vocab_files_names = VOCAB_FILES_NAMES + pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP + model_input_names = ["input_ids", "attention_mask"] + _auto_class = "AutoTokenizer" + + def __init__( + self, + vocab_file, + unk_token="", + bos_token="", + eos_token="", + pad_token="", + sp_model_kwargs: Optional[Dict[str, Any]] = None, + add_bos_token=True, + add_eos_token=False, + decode_with_prefix_space=False, + clean_up_tokenization_spaces=False, + **kwargs, + ): + self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs + self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs) + self.sp_model.Load(vocab_file) + super().__init__( + bos_token=bos_token, + eos_token=eos_token, + unk_token=unk_token, + pad_token=pad_token, + clean_up_tokenization_spaces=clean_up_tokenization_spaces, + **kwargs, + ) + self.vocab_file = vocab_file + self.add_bos_token = convert_bool(add_bos_token) + self.add_eos_token = add_eos_token + self.decode_with_prefix_space = decode_with_prefix_space + self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs) + self.sp_model.Load(vocab_file) + self._no_prefix_space_tokens = None + + """ Initialisation""" + + @property + def no_prefix_space_tokens(self): + if self._no_prefix_space_tokens is None: + vocab = self.convert_ids_to_tokens(list(range(self.vocab_size))) + self._no_prefix_space_tokens = {i for i, tok in enumerate(vocab) if not tok.startswith("▁")} + return self._no_prefix_space_tokens + + @property + def vocab_size(self): + """Returns vocab size""" + return self.sp_model.get_piece_size() + + @property + def bos_token_id(self) -> Optional[int]: + return self.sp_model.bos_id() + + @property + def eos_token_id(self) -> Optional[int]: + return super().eos_token_id + + def get_vocab(self): + """Returns vocab as a dict""" + vocab = {self.convert_ids_to_tokens(i): i for i in range(self.vocab_size)} + vocab.update(self.added_tokens_encoder) + return vocab + + def _tokenize(self, text): + """Returns a tokenized string.""" + return self.sp_model.encode(text, out_type=str) + + def _convert_token_to_id(self, token): + """Converts a token (str) in an id using the vocab.""" + return self.sp_model.piece_to_id(token) + + def _convert_id_to_token(self, index): + """Converts an index (integer) in a token (str) using the vocab.""" + token = self.sp_model.IdToPiece(index) + return token + + def _maybe_add_prefix_space(self, tokens, decoded): + if tokens and tokens[0] not in self.no_prefix_space_tokens: + return " " + decoded + else: + return decoded + + def convert_tokens_to_string(self, tokens): + """Converts a sequence of tokens (string) in a single string.""" + current_sub_tokens = [] + out_string = "" + prev_is_special = False + for token in tokens: + # make sure that special tokens are not decoded using sentencepiece model + if token in self.all_special_tokens: + # Decode the current sub-tokens first + if current_sub_tokens: + out_string += self.sp_model.decode(current_sub_tokens) + current_sub_tokens = [] + # Append the special token without adding extra spaces + out_string += token + prev_is_special = True + else: + current_sub_tokens.append(token) + prev_is_special = False + # Decode any remaining sub-tokens + if current_sub_tokens: + out_string += self.sp_model.decode(current_sub_tokens) + # Clean up leading and trailing spaces + if self.clean_up_tokenization_spaces: + out_string = self.clean_up_tokenization(out_string) + out_string = self._maybe_add_prefix_space(tokens=tokens, decoded=out_string) + return out_string[1:] + + # Override decode to set spaces_between_special_tokens to True as default + def decode(self, + token_ids, + spaces_between_special_tokens: bool = False, + **kwargs): + return super().decode( + token_ids=token_ids, + spaces_between_special_tokens=spaces_between_special_tokens, + **kwargs, + ) + + def save_vocabulary(self, save_directory, filename_prefix: Optional[str] = None) -> Tuple[str]: + """ + Save the vocabulary and special tokens file to a directory. + + Args: + save_directory (`str`): + The directory in which to save the vocabulary. + + Returns: + `Tuple(str)`: Paths to the files saved. + """ + if not os.path.isdir(save_directory): + logger.error(f"Vocabulary path ({save_directory}) should be a directory") + return ("",) + out_vocab_file = os.path.join( + save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"] + ) + + if os.path.abspath(self.vocab_file) != os.path.abspath(out_vocab_file) and os.path.isfile(self.vocab_file): + copyfile(self.vocab_file, out_vocab_file) + elif not os.path.isfile(self.vocab_file): + with open(out_vocab_file, "wb") as fi: + content_spiece_model = self.sp_model.serialized_model_proto() + fi.write(content_spiece_model) + + return (out_vocab_file,) + + def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None): + if self.add_bos_token: + bos_token_ids = [self.bos_token_id] + else: + bos_token_ids = [] + + output = bos_token_ids + token_ids_0 + + if token_ids_1 is not None: + output = output + token_ids_1 + + if self.add_eos_token: + output = output + [self.eos_token_id] + + return output + + def get_special_tokens_mask( + self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None, already_has_special_tokens: bool = False + ) -> List[int]: + """ + Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding + special tokens using the tokenizer `prepare_for_model` method. + + Args: + token_ids_0 (`List[int]`): + List of IDs. + token_ids_1 (`List[int]`, *optional*): + Optional second list of IDs for sequence pairs. + already_has_special_tokens (`bool`, *optional*, defaults to `False`): + Whether or not the token list is already formatted with special tokens for the model. + + Returns: + `List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token. + """ + if already_has_special_tokens: + return super().get_special_tokens_mask( + token_ids_0=token_ids_0, token_ids_1=token_ids_1, already_has_special_tokens=True + ) + + if token_ids_1 is None: + return [1] + ([0] * len(token_ids_0)) + [1] + return [1] + ([0] * len(token_ids_0)) + [1, 1] + ([0] * len(token_ids_1)) + [1] + + def create_token_type_ids_from_sequences( + self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None + ) -> List[int]: + """ + Create a mask from the two sequences passed to be used in a sequence-pair classification task. T5 does not make + use of token type ids, therefore a list of zeros is returned. + + Args: + token_ids_0 (`List[int]`): + List of IDs. + token_ids_1 (`List[int]`, *optional*): + Optional second list of IDs for sequence pairs. + + Returns: + `List[int]`: List of zeros. + """ + eos = [self.eos_token_id] + + if token_ids_1 is None: + return len(token_ids_0 + eos) * [0] + return len(token_ids_0 + eos + token_ids_1 + eos) * [0] diff --git a/tokenizer.model b/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..14ca2f88ae0f546bcaabe8ae5f35eb5134d5a77d --- /dev/null +++ b/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b16f1558c0cd4ae6ef1a2c605713be0a514f50e1ce2d2c878979ce988c148ec +size 2477809 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d1fb8f9f976b1bd98ac5b73ae63c8877944be7f1 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1 @@ +{"add_bos_token": true, "add_eos_token": false, "add_prefix_space": true, "added_tokens_decoder": {"0": {"content": "", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "1": {"content": "", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "2": {"content": "", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45806": {"content": "<|User|>:", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45813": {"content": "<|Bot|>:", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45830": {"content": "[unused0]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45840": {"content": "[unused1]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45846": {"content": "[unused2]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45849": {"content": "[unused3]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45861": {"content": "[unused4]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45866": {"content": "[unused5]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45874": {"content": "[unused6]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45883": {"content": "[unused7]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45884": {"content": "[unused8]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45887": {"content": "[unused9]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45892": {"content": "[unused10]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45920": {"content": "[unused11]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45932": {"content": "[unused12]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45938": {"content": "[unused13]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45953": {"content": "[unused14]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45968": {"content": "[unused15]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45974": {"content": "[unused16]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45982": {"content": "[unused17]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45986": {"content": "[unused18]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "46005": {"content": "[unused19]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "46007": {"content": "[unused20]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "46014": {"content": "[unused21]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "46017": {"content": "[unused22]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "46028": {"content": "[unused23]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "46032": {"content": "[unused24]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "46081": {"content": "[unused25]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "46086": {"content": "[unused26]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "46101": {"content": "[unused27]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "46183": {"content": "[unused28]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "46230": {"content": "[unused29]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "46245": {"content": "[unused30]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "46257": {"content": "[unused31]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "144208": {"content": "[unused32]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "144209": {"content": "[unused33]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}}, "auto_map": {"AutoTokenizer": ["tokenization_openpangu.OpenPanguTokenizer", null]}, "bos_token": "", "clean_up_tokenization_spaces": false, "eos_token": "[unused10]", "legacy": true, "model_max_length": 1000000000000000019884624838656, "pad_token": null, "sp_model_kwargs": {}, "spaces_between_special_tokens": false, "tokenizer_class": "OpenPanguTokenizer", "unk_token": "", "use_default_system_prompt": false, "chat_template": "{%- set ns = namespace(is_first_tool=true) %}\n{%- if not mcp_prompt is defined %}\n {%- set mcp_prompt = true %}\n{%- endif %}\n{%- if not background is defined %}\n {%- set background = none %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n{%- if not think is defined %}\n {%- set think = true %}\n{%- endif %}\n{%- if not reasoning_effort is defined %}\n {%- set reasoning_effort = \"high\" %}\n{%- endif %}\n\n{{- '[unused9]系统:' -}}\n{#- 提取系统消息 #}\n{%- set system_message = \"\" %}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- endif %}\n{#- 如果传入工具将使用mcp人设,可以使用mcp_prompt字段禁用 #}\n{%- if mcp_prompt and tools %}\n {%- if system_message %}\n {%- set system_message = system_message + \"\n\" %}\n {%- endif %}\n {%- set system_message = system_message + \"你是一个能够调用外部工具解决问题的专家,你的目标是高效、准确、清晰地完成任务。\n你需要根据用户的问题,决定是否需要使用工具来完成任务。如果需要,请以明确的格式调用工具;如果不需要,请直接回答。\n你可以根据上下文决定是否继续调用工具或基于已有结果直接回答用户。如果工具调用已足够,请合理组织语言向用户汇报结论。在没有获得显式的调用结果之前,在调用工具的当轮回复之内严禁虚构或者假设一个工具调用结果来完成任务或者回答问题。也不应在没有返回工具调用信息的情况下,在调用工具的当轮假设或者明确声称工具执行成功。\" %}\n{%- endif %}\n{#- 思维链分档 #}\n{%- if reasoning_effort == \"low\" and think %}\n {%- if system_message or tools or background %}\n {%- set system_message = \"\n\neffort: compact\n\n\n\n\" + system_message %}\n {%- else %}\n {%- set system_message = \"\n\neffort: compact\n\n\" %}\n {%- endif %}\n{%- endif %}\n{{- system_message -}}\n\n{#- 工具使用描述和规范调用格式 #}\n{%- if tools %}\n {{- '\n你将在标签对内获得每个工具的描述:\n\n' }}\n {{- tools | tojson(ensure_ascii=False, sort_keys=False) }}\n {{- '\n\n' }}\n {{- \"对于每个函数调用,返回一个 JSON 对象,放在 [unused11][unused12] 标签对中,多个调用组成一个列表,其中每个函数包含函数名和对应函数的参数,格式如下:\n\" }}\n {{- '[unused11]\n[{\"name\": \"<函数名1>\", \"arguments\": }, {\"name\": \"<函数名2>\", \"arguments\": }, ...]\n[unused12]' }}\n {{- '\n<工具使用原则>\n1. 只有在所有必填参数(required字段中列出的)都具备有效值时,才能调用该函数\n2. 如果缺少任何必填参数,必须向用户询问缺失的参数,而不是直接调用函数\n3. 可选参数如果没有提供可以忽略或使用默认值\n' }}\n{%- endif %}\n\n{#- 背景信息字段 #}\n{%- if background is not none and background -%}\n {{- '\n<背景信息>' -}}\n {{- background -}}\n {{- '' -}}\n{%- endif %}\n\n{%- if messages | length == 0 and not think %}\n {{- \" /no_think\" -}}\n{%- endif %}\n{{- '[unused10]' -}}\n\n{%- if messages | length != 0 %}\n {%- for message in messages[:-1] %}\n {%- if message['role'] == 'user' %}\n {{- '[unused9]用户:' + message['content'] -}}\n {%- if message.get('tool_calls') %}\n {{- '[unused11]\n[' }}\n {%- set function_list = message.tool_calls | selectattr('function') | map(attribute='function') | list %}\n {%- for function_item in function_list %}\n {%- if not ns.is_first_tool %}\n {{- ', ' -}}\n {%- endif %}\n {%- if function_item.arguments is string %}\n {{- '{\"name\": \"' + function_item.name + '\", \"arguments\": ' + function_item.arguments + '}' -}}\n {%- else %}\n {{- '{\"name\": \"' + function_item.name + '\", \"arguments\": ' + function_item.arguments | tojson(ensure_ascii=False, sort_keys=False) + '}' -}}\n {%- endif %}\n {%- set ns.is_first_tool = false %}\n {%- endfor %}\n {%- set ns.is_first_tool = true %}\n {{- ']\n[unused12]' }}\n {%- endif %}\n {{- \" /no_think\" -}}\n {{- '[unused10]' -}}\n {%- endif %}\n\n {%- if message['role'] == 'assistant' %}\n {{- '[unused9]助手:[unused16][unused17]' -}}\n {{- message['content'] -}}\n {%- if message.get('tool_calls') %}\n {{- '[unused11]\n[' }}\n {%- set function_list = message.tool_calls | selectattr('function') | map(attribute='function') | list %}\n {%- for function_item in function_list %}\n {%- if not ns.is_first_tool %}\n {{- ', ' -}}\n {%- endif %}\n {{- '{\"name\": \"' + function_item.name + '\", \"arguments\": ' + function_item.arguments | tojson(ensure_ascii=False, sort_keys=False) + '}' -}}\n {%- set ns.is_first_tool = false %}\n {%- endfor %}\n {%- set ns.is_first_tool = true %}\n {{- ']\n[unused12]' }}\n {%- endif %}\n {{- '[unused10]' }}\n {%- endif %}\n\n {%- if message['role'] == 'tool' %}\n {{- '[unused9]' -}}\n {{- '工具:' + message['content'] + \" /no_think\" -}}\n {{- '[unused10]' -}}\n {%- endif %}\n {%- endfor %}\n\n {#- 处理最后一个角色,判断快慢思考 #}\n {%- if messages[-1]['role'] == \"user\" %}\n {{- '[unused9]' -}}\n {{- '用户:' + messages[-1]['content'] -}}\n {%- if messages[-1].get('tool_calls') %}\n {{- '[unused11]\n[' }}\n {%- set function_list = messages[-1].tool_calls | selectattr('function') | map(attribute='function') | list %}\n {%- for function_item in function_list %}\n {%- if not ns.is_first_tool %}\n {{- ', ' -}}\n {%- endif %}\n {%- if function_item.arguments is string %}\n {{- '{\"name\": \"' + function_item.name + '\", \"arguments\": ' + function_item.arguments + '}' -}}\n {%- else %}\n {{- '{\"name\": \"' + function_item.name + '\", \"arguments\": ' + function_item.arguments | tojson(ensure_ascii=False, sort_keys=False) + '}' -}}\n {%- endif %}\n {%- set ns.is_first_tool = false %}\n {%- endfor %}\n {%- set ns.is_first_tool = true %}\n {{- ']\n[unused12]' }}\n {%- endif %}\n {%- if not think %}\n {{- \" /no_think\" -}}\n {%- endif %}\n {{- '[unused10]' -}}\n {%- endif %}\n {%- if messages[-1]['role'] == \"tool\" %}\n {{- '[unused9]' -}}\n {{- '工具:' + messages[-1]['content'] -}}\n {%- if not think %}\n {{- \" /no_think\" -}}\n {%- endif %}\n {{- '[unused10]' -}}\n {%- endif %}\n {%- if messages[-1]['role'] == \"assistant\" %}\n {{- '[unused9]' -}}\n {{- '助手:[unused16][unused17]' + messages[-1]['content'] -}}\n {%- if messages[-1].get('tool_calls') %}\n {{- '[unused11]\n[' }}\n {%- set function_list = messages[-1].tool_calls | selectattr('function') | map(attribute='function') | list %}\n {%- for function_item in function_list %}\n {%- if not ns.is_first_tool %}\n {{- ', ' -}}\n {%- endif %}\n {{- '{\"name\": \"' + function_item.name + '\", \"arguments\": ' + function_item.arguments | tojson(ensure_ascii=False, sort_keys=False) + '}' -}}\n {%- set ns.is_first_tool = false %}\n {%- endfor %}\n {%- set ns.is_first_tool = true %}\n {{- ']\n[unused12]' }}\n {%- endif %}\n {%- if not think %}\n {{- \" /no_think\" -}}\n {%- endif %}\n {{- '[unused10]' -}}\n {%- endif %}\n{%- endif %}\n\n{{-'[unused9]助手:' }}\n"} \ No newline at end of file