diff --git a/.gitattributes b/.gitattributes
index a6344aac8c09253b3b630fb776ae94478aa0275b..e3603fe277c8f35347e530ec68b724b55ac35444 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -33,3 +33,55 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
+model-00016-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00051-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00045-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00013-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00010-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00015-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00041-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00047-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00021-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text
+tokenizer.model filter=lfs diff=lfs merge=lfs -text
+model-00046-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00032-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00042-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00025-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00024-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00004-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00037-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00049-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00023-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00033-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00040-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00027-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00009-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00031-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00036-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00018-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00019-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00034-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00022-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00026-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00017-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00020-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00001-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00039-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00038-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00029-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00012-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00035-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00002-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00006-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00044-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00011-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00003-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00005-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00014-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00007-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00043-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00048-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00008-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00030-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00050-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00028-of-000051.safetensors filter=lfs diff=lfs merge=lfs -text
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000000000000000000000000000000000000..386bd0a09ea862ac149244862dd5464abe7360fb
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,34 @@
+OPENPANGU MODEL LICENSE AGREEMENT VERSION 1.0
+
+This OPENPANGU MODEL LICENSE AGREEMENT VERSION 1.0 (the "Agreement") is a legal agreement between You and Huawei Technologies Co., Ltd. ("Huawei", "We" or "Us"), and it governs Your reproducing, use, modification, and distribution of openPangu as made available by Huawei under this Agreement.
+
+By using, reproducing, modifying, distributing, performing or displaying any portion or element of openPangu, or otherwise accepting the terms of this Agreement, You agree to be bound by this Agreement.
+
+1. Definitions.
+1.1. “openPangu” or “Model” means openPangu large language models and software, including trained model weights, parameters (including optimizer states), accompanying source code and scripts released under this Agreement.
+1.2. “Derivative Model” means all (1) modifications to the Model, (2) works based on the Model, and (3) any other derivative works of the Model. For clarity, information or content results from operating or otherwise using the Model is not a Derivative Model.
+1.3. “You” or “Your” means an individual or Legal Entity exercising permissions granted by this Agreement and/or using the Model for any purpose.
+1.4. “Third Party” or “Third Parties” means individuals or legal entities that are not under common control with Us or You.
+
+2. License Grant. Subject to Your full compliance with the terms and conditions of this Agreement, We hereby grant to You a perpetual, worldwide, non-exclusive, non-transferable, no-charge, royalty-free license (except as stated in Section 3) to use, reproduce, modify, and distribute the Model.
+
+3. Conditions for License Grant. You represent and warrant that You will not, access, download, install, run, deploy, integrate, modify, or otherwise use the Model, directly or indirectly, within the European Union.
+
+
+4. Redistribution.
+4.1. If You distribute the Model or Derivative Model, You shall retain in Your distribution (1) a copy of this agreement, and (2) all copyright notices and other notices of origin included in the Model that are applicable to Your distribution.
+4.2. Further, if You distribute or make available to Third Parties a product or service (including another AI model) based on the Model, You are required to (1) display the acknowledgement “Powered by openPangu” and (2) include a trademark notice “openPangu is a trademark of Huawei Technologies Co., Ltd.” on related webpages, user manuals, product documentations or other advertising materials mentioning features of the Model.
+4.3. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for Derivative Model made by You as a whole, provided Your use, reproduction, and distribution of the Model otherwise complies with the terms and conditions of this Agreement.
+
+5. Ownership. We do not claim ownership to any information or content generated using the Model or Derivative Model that are made by You. You are solely responsible for evaluating the accuracy and appropriateness of such information or content for Your use case.
+
+6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of Huawei, except as required for complying with Section 4.2.
+
+7. Indemnity. You will indemnify and hold harmless Huawei from and against any claim by any third party arising out of or related to Your use or distribution of the Model or Derivative Model made by You (e.g. a violation against Section 3). For avoidance of doubt, “third party” in this clause include supervisory authorities.
+
+8. THE MODEL IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE, NONINFRINGEMENT, ACCURACY, OR THE ABSENCE OF LATENT OR OTHER DEFECTS OR ERRORS, WHETHER OR NOT DISCOVERABLE, ALL TO THE GREATEST EXTENT PERMISSIBLE UNDER APPLICABLE LAW.
+
+9. IN NO EVENT SHALL WE BE LIABLE TO YOU FOR ANY DAMAGES, INCLUDING, BUT NOT LIMITED TO ANY DIRECT, OR INDIRECT, SPECIAL OR CONSEQUENTIAL DAMAGES ARISING FROM YOUR USE OR INABILITY TO USE THE MODEL, IN WHOLE OR IN PART, NO MATTER HOW IT’S CAUSED OR THE LEGAL THEORY IT IS BASED ON, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+
+END OF THE TERMS AND CONDITIONS
diff --git a/OPEN SOURCE SOFTWARE NOTICE b/OPEN SOURCE SOFTWARE NOTICE
new file mode 100644
index 0000000000000000000000000000000000000000..87473f23483a276c53ff5741064c320f0b79d04e
--- /dev/null
+++ b/OPEN SOURCE SOFTWARE NOTICE
@@ -0,0 +1,635 @@
+OPEN SOURCE SOFTWARE NOTICE
+
+Please note we provide an open source software notice along with this product and/or this product firmware (in the following just “this product”). The open source software licenses are granted by the respective right holders. And the open source licenses prevail all other license information with regard to the respective open source software contained in the product, including but not limited to End User Software Licensing Agreement. This notice is provided on behalf of Huawei Technologies Co. Ltd. and any of its local subsidiaries which may have provided this product to you in your local country.
+
+Warranty Disclaimer
+THE OPEN SOURCE SOFTWARE IN THIS PRODUCT IS DISTRIBUTED IN THE HOPE THAT IT WILL BE USEFUL, BUT WITHOUT ANY WARRANTY, WITHOUT EVEN THE IMPLIED WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. SEE THE APPLICABLE LICENSES FOR MORE DETAILS.
+
+Copyright Notice and License Texts
+
+Software: transformers 4.48.2
+Copyright notice:
+Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
+
+License Text:
+----------------------------------------
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+Software: vllm 0.9.1
+Copyright notice:
+Copyright 2025 The vLLM team.
+
+License Text:
+----------------------------------------
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+Software: vllm-ascend 0.9.1
+Copyright notice:
+Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
+
+License Text:
+----------------------------------------
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
\ No newline at end of file
diff --git a/README.md b/README.md
index 7891eec530a67a914f6cd982dfbeb710b7c8a658..3302700dc9d9123c71eaaea80a7bedac8f091ed0 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,85 @@
----
-license: other
-license_name: openpangu-model-license-agreement-version-1.0
-license_link: https://ai.gitcode.com/ascend-tribe/openPangu-R-72B-2512/blob/main/LICENSE
----
+# openPangu-R-72B-2512
+中文 | [English](README_EN.md)
+
+## 1. 简介
+openPangu-R-72B-2512 是基于昇腾集群训练的MoE模型。模型总参数量74B,激活参数量15B,采用80选8的专家选择机制,支持128k长序列处理。训练数据总量约24T tokens。同一个模型支持快慢思考两种模式切换,慢思考模式下,支持思维链分档(“低”和“高”两种推理深度)。
+
+## 2. 模型架构
+openPangu-R-72B-2512 在模型稳定收敛和效果提升方向进行了以下优化:
+- 在注意力机制中引入参数式Sink Token技术: 有效缓解极大激活值问题,训练中最大激活值从$10^3$降至$10^2$量级, 提升训练稳定性并对后量化亲和。
+
+- K-Norm与Depth-Scaled Sandwich-Norm:为保证attention logits的稳定性,我们采用了K-Norm结构。K-Norm与QK-Norm类似,但只对attention的key施加RMS Norm。K-Norm可起到与QK-Norm类似的稳定性效果,但引入的计算开销更小,同时K-Norm不影响Query的scale,带来更灵活的表达能力。为了保证残差连接的稳定性,我们采用了Depth-Scaled Sandwich-Norm方法。
+
+- 注意力架构优化:增加Query头数和注意力头维度,使模型能够从更多角度捕获细粒度语义关系。引入Partial RoPE机制,仅对Query和Key中1/3维度应用位置编码。尽管Key头维度有所增加,但通过将KV组数量减半,KV cache仍可减少37.5%,在保持推理阶段显存和速度优化的同时,实现了更低的训练损失和更优的推理性能。
+
+- Adaptive Aux Free负载优化技术:能够自适应调整专家bias更新幅度,减少均衡震荡现象,优化专家负载分布均衡性。
+
+
+详细架构参数如下:
+
+| | |
+|:---:|:---:|
+| **Architecture** | Mixture-of-Experts (MoE) |
+| **Total Parameters** | 74B |
+| **Activated Parameters** | 15B |
+| **Number of Layers** (Dense layer included) | 50 |
+| **Number of Dense Layers** | 4 |
+| **Number of MTP Modules** | 1 |
+| **Hidden Dimension** | 4608 |
+| **MoE Hidden Dimension** (per Expert) | 1280 |
+| **Attention Mechanism** | GQA |
+| **Number of Attention Heads** | 64 |
+| **Number of Query Groups** | 4 |
+| **Number of Experts** | 80 |
+| **Selected Experts per Token** | 8 |
+| **Number of Shared Experts** | 2 |
+| **Vocabulary Size** | 153K |
+| **Context Length** | 128K |
+
+
+## 3. 测评结果
+
+| 测评集 | 测评指标 | openPangu-R-72B-2512 快思考 | openPangu-R-72B-2512 慢思考 |
+|:------------------:|:----------------------------:|:-----:|:-----:|
+| **通用能力** | | |
+| LiveBench | Acc (2024-11-25) | 67.3 | 75.2 |
+| MMLU-Pro | Exact Match | 84.2 | 84.8 |
+| MMLU-ProX | Acc | 76.9 | 80.6 |
+| RULER | Acc | 95.6 | 94.7 |
+| LongBench V2 | Acc |45.3 |55.3 |
+| IF-Eval | Prompt Strict | 86.3 | 79.1 |
+| Hallucination-LeaderBoard | 1-HHEM | 96.5 | 97.1 |
+| GPQA-Dimaond | Avg@4 | 76.8 | 83.2 |
+| SuperGPQA | Acc | 58.9 | 64.2 |
+| **数学能力** | | |
+| AIME24 | Avg@16 | 75.6 | 89.0 |
+| AIME25 | Avg@16 | 60.6 | 81.3 |
+| CNMO 2024 | Avg@32 | 77.8 | 82.8 |
+| HMMT 2025 | Avg@16 (February) | 45.4 | 74.8 |
+| **代码能力** | | |
+| LiveCodeBench V6 | Avg@3 (01/25~05/25) | 41.9 | 69.5 |
+| Codeforces | Elo Avg@3 (02/25~09/25) | 1044.5 | 1701.4 |
+| **Agent工具调用** | | |
+| BFCL-V3 | Acc (Prompt) | 74.6 | 76.5 |
+| Tau-Bench (airline) | Avg@3 (FC) | 45.3 | 56.0 |
+| Tau-Bench (retail) | Avg@3 (FC) | 70.1 | 73.0 |
+| Tau2-Bench (airline) | Avg@3 (FC) | 58.0 | 65.3 |
+| Tau2-Bench (retail) | Avg@3 (FC) | 71.4 | 78.7 |
+| Tau2-Bench (telecom) | Avg@3 (FC) | 48.8 | 49.4 |
+| AceBench | Acc (Prompt) | 74.3 | 79.6 |
+
+
+## 4. 部署和使用
+- 使用omni-infer推理框架,参考[[omniinfer_for_openpangu_r_72b_2512](doc/omniinfer_for_openpangu_r_72b_2512.md)]
+
+## 5. 模型许可证
+除文件中对开源许可证另有约定外,openPangu-R-72B-2512 模型根据 OPENPANGU MODEL LICENSE AGREEMENT VERSION 1.0 授权,旨在允许使用并促进人工智能技术的进一步发展。有关详细信息,请参阅模型存储库根目录中的 [LICENSE](LICENSE) 文件。
+
+## 6. 免责声明
+由于 openPangu-R-72B-2512 (“模型”)所依赖的技术固有的限制,以及人工智能生成的内容是由盘古自动生成的,华为无法对以下事项做出任何保证:
+- 该模型的输出通过AI算法自动生成,不能排除某些信息可能存在缺陷、不合理或引起不适的可能性,生成的内容不代表华为的态度或立场;
+- 无法保证该模型100%准确、可靠、功能齐全、及时、安全、无错误、不间断、持续稳定或无任何故障;
+- 该模型的输出内容不构成任何建议或决策,也不保证生成的内容的真实性、完整性、准确性、及时性、合法性、功能性或实用性。生成的内容不能替代医疗、法律等领域的专业人士回答您的问题。生成的内容仅供参考,不代表华为的任何态度、立场或观点。您需要根据实际情况做出独立判断,华为不承担任何责任。
+
+## 7. 反馈
+如果有任何意见和建议,请提交issue或联系[openPangu@huawei.com](url)。
diff --git a/README_EN.md b/README_EN.md
new file mode 100644
index 0000000000000000000000000000000000000000..4ee1db335104a8618875990eb6ab6ac20838aea2
--- /dev/null
+++ b/README_EN.md
@@ -0,0 +1,85 @@
+# openPangu-R-72B-2512
+[中文](README.md) | English
+
+## 1. Introduction
+openPangu-R-72B-2512 is an MoE model trained on Ascend. The model has 74B total parameters and 15B activated parameters. It selects top 8 experts out of 80 routed experts. Its context length is 128k. The total pretraining data contains 24T tokens. It supports switching between two modes (fast-thinking and slow-thinking). In slow-thinking mode, we support two types of reasoning effort ('low' and 'high').
+
+## 2. Architecture
+openPangu-R-72B-2512 includes several enhancements:
+- Parametric sink token: Effectively mitigates the problem of extremely large activation values, reducing the maximum activation value from the order of $10^3$ to $10^2$ during training, which improves training stability and enhances compatibility with post-quantization.
+
+- K-Norm and Depth-Scaled Sandwich-Norm: To ensure the stability of attention logits, we apply K-Norm, a structure analogous to QK-Norm but applies RMS Norm solely to the attention keys. This approach achieves stability effects comparable to QK-Norm while introducing less computational overhead. Moreover, by preserving the original scale of Query, K-Norm offer greater expressive flexibility. To maintain the stability of residual connections, we employ the Depth-Scaled Sandwich-Norm.
+
+- Attention design: We increase Query heads and attention head dimensions to enable the model to capture fine-grained semantic relationships from multiple perspectives. The Partial RoPE mechanism applies positional encoding to only 1/3 of the dimensions in Query and Key. Although the Key head dimension increases, halving the number of KV groups still reduces KV cache by 37.5%, achieving lower training loss and improved inference performance while maintaining memory and speed optimizations during the inference stage.
+
+- Adaptive Aux-Free Load Balancing Strategy:This approach adaptively adjusts the update magnitude of expert bias, mitigates balancing oscillations, and optimizes the equilibrium of expert load distribution.
+
+Hyperparameters related to model architecture are as follows:
+
+| | |
+|:---:|:---:|
+| **Architecture** | Mixture-of-Experts (MoE) |
+| **Total Parameters** | 74B |
+| **Activated Parameters** | 15B |
+| **Number of Layers** (Dense layer included) | 50 |
+| **Number of Dense Layers** | 4 |
+| **Number of MTP Modules** | 1 |
+| **Hidden Dimension** | 4608 |
+| **MoE Hidden Dimension** (per Expert) | 1280 |
+| **Attention Mechanism** | GQA |
+| **Number of Attention Heads** | 64 |
+| **Number of Query Groups** | 4 |
+| **Number of Experts** | 80 |
+| **Selected Experts per Token** | 8 |
+| **Number of Shared Experts** | 2 |
+| **Vocabulary Size** | 153K |
+| **Context Length** | 128K |
+
+## 3. Results
+| Benchmark | Metric | openPangu-R-72B-2512 Fast-thinking | openPangu-R-72B-2512 Slow-thinking |
+|:------------------:|:----------------------------:|:-----:|:-----:|
+| **General** | | |
+| LiveBench | Acc (2024-11-25) | 67.3 | 75.2 |
+| MMLU-Pro | Exact Match | 84.2 | 84.8 |
+| MMLU-ProX | Acc | 76.9 | 80.6 |
+| RULER | Acc | 95.6 | 94.7 |
+| LongBench V2 | Acc |45.3 |55.3 |
+| IF-Eval | Prompt Strict | 86.3 | 79.1 |
+| Hallucination-LeaderBoard | 1-HHEM | 96.5 | 97.1 |
+| GPQA-Dimaond | Avg@4 | 76.8 | 83.2 |
+| SuperGPQA | Acc | 58.9 | 64.2 |
+| **Math** | | |
+| AIME24 | Avg@16 | 75.6 | 89.0 |
+| AIME25 | Avg@16 | 60.6 | 81.3 |
+| CNMO 2024 | Avg@32 | 77.8 | 82.8 |
+| HMMT 2025 | Avg@16 (February) | 45.4 | 74.8 |
+| **Coding** | | |
+| LiveCodeBench V6 | Avg@3 (01/25~05/25) | 41.9 | 69.5 |
+| Codeforces | Elo Avg@3 (02/25~09/25) | 1044.5 | 1701.4 |
+| **Agentic Tool Use** | | |
+| BFCL-V3 | Acc (Prompt) | 74.6 | 76.5 |
+| Tau-Bench (airline) | Avg@3 (FC) | 45.3 | 56.0 |
+| Tau-Bench (retail) | Avg@3 (FC) | 70.1 | 73.0 |
+| Tau2-Bench (airline) | Avg@3 (FC) | 58.0 | 65.3 |
+| Tau2-Bench (retail) | Avg@3 (FC) | 71.4 | 78.7 |
+| Tau2-Bench (telecom) | Avg@3 (FC) | 48.8 | 49.4 |
+| AceBench | Acc (Prompt) | 74.3 | 79.6 |
+
+## 4. Deployment
+- omni-infer:please refer to [[omniinfer_for_openpangu_r_72b_2512](doc/omniinfer_for_openpangu_r_72b_2512_EN.md)]
+
+## 5. Model License
+Unless otherwise noted, the openPangu-R-72B-2512 model is licensed under the terms and conditions of OPENPANGU MODEL LICENSE AGREEMENT VERSION 1.0, which is intended to be used permissively and enable the further development of artificial intelligence technologies. Please refer to the [LICENSE](LICENSE) file located in the root directory of the model repository for details.
+
+## 6. Disclaimer
+Due to the technical limitations inherent in the technology on which the openPangu-R-72B-2512 model (“Model”) relies and the fact that the artificial intelligence generated content is automatically produced by Model, Huawei cannot make any guarantees regarding the following matters:
+
+- The output of this Model is automatically generated via AI algorithms, it does not rule out the possibility that some of the information may be flawed, unreasonable, or cause discomfort, and the generated content does not represent Huawei's attitude or standpoint;
+- There is no guarantee that this Model is 100% accurate, reliable, functional, timely, secure and safety, error-free, uninterrupted, continuously stable, or free of any faults;
+- The output of this Model does not constitute any advices or decisions for you, and it does not guarantee the authenticity, completeness, accuracy, timeliness, legality, functionality, or practicality of the generated content. The generated content cannot replace professionals in medical, legal, and other fields in answering your questions. The generated content is for your reference only and does not represent any attitude, standpoint, or position of Huawei. You need to make independent judgments based on your actual situation, and Huawei does not assume any responsibilities.
+
+## 7. Contact
+If you have any question, please raise an issue or contact us at [openPangu@huawei.com](url).
+
+
+
diff --git a/config.json b/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..80966a8db7cafdfe3a17b1408f4076207ba9e9a1
--- /dev/null
+++ b/config.json
@@ -0,0 +1,45 @@
+{
+ "architectures": [
+ "PanguProMoEV2ForCausalLM"
+ ],
+ "attention_dropout": 0.0,
+ "auto_map": {
+ "AutoConfig": "configuration_pangu_moe.PanguProMoEConfig",
+ "AutoModel": "modeling_pangu_moe.PanguProMoEModel",
+ "AutoModelForCausalLM": "modeling_pangu_moe.PanguProMoEForCausalLM"
+ },
+ "bos_token_id": 1,
+ "eos_token_id": 45892,
+ "first_k_dense_replace": 4,
+ "hidden_act": "silu",
+ "hidden_size": 4608,
+ "initializer_range": 0.02,
+ "intermediate_size": 10240,
+ "max_position_embeddings": 4096,
+ "model_type": "PanguProMoE",
+ "moe_intermediate_size": 1280,
+ "n_routed_experts": 80,
+ "n_shared_experts": 2,
+ "norm_topk_prob": true,
+ "num_attention_heads": 64,
+ "num_experts_per_tok": 8,
+ "num_hidden_layers": 50,
+ "num_key_value_heads": 4,
+ "num_nextn_predict_layers": 1,
+ "output_router_logits": false,
+ "rms_norm_eps": 1e-05,
+ "rope_theta": 10000,
+ "routed_scaling_factor": 2.5,
+ "router_enable_expert_bias": true,
+ "sandwich_norm": true,
+ "tie_word_embeddings": false,
+ "torch_dtype": "bfloat16",
+ "transformers_version": "4.48.2",
+ "use_cache": true,
+ "vocab_size": 153600,
+ "qk_nope_dim": 128,
+ "qk_rope_dim": 64,
+ "v_channels": 128,
+ "param_sink_number": 128,
+ "param_sink_with_value": true
+}
diff --git a/configuration_pangu_moe.py b/configuration_pangu_moe.py
new file mode 100644
index 0000000000000000000000000000000000000000..e1c2be8fd9edfa15067d7a4c83e0cac4d183b31b
--- /dev/null
+++ b/configuration_pangu_moe.py
@@ -0,0 +1,96 @@
+# coding=utf-8
+# Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved.
+# Copyright 2024 The Qwen team, Alibaba Group and the HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+""" PanguProMoE model configuration"""
+
+
+from transformers.configuration_utils import PretrainedConfig
+from transformers.utils import logging
+
+
+logger = logging.get_logger(__name__)
+
+
+class PanguProMoEConfig(PretrainedConfig):
+
+ model_type = "PanguProMoE"
+ _auto_class = "AutoConfig"
+
+ def __init__(
+ self,
+ vocab_size=153376,
+ hidden_size=4608,
+ intermediate_size=10240,
+ num_hidden_layers=50,
+ num_attention_heads=64,
+ num_key_value_heads=4,
+ mlp_only_layers=[0,1,2,3],
+ hidden_act="silu",
+ max_position_embeddings=8192,
+ initializer_range=0.02,
+ rms_norm_eps=1e-5,
+ use_cache=True,
+ tie_word_embeddings=False,
+ rope_theta=100000,
+ moe_intermediate_size=1280,
+ shared_expert_intermediate_size=2560,
+ num_experts_per_tok=8,
+ num_experts=80,
+ norm_topk_prob=True,
+ router_enable_expert_bias=True,
+ output_router_logits=False,
+ routed_scaling_factor=2.5,
+ qk_nope_dim = 128,
+ qk_rope_dim = 64,
+ v_channels = 128,
+ sandwich_norm=True,
+ param_sink_number = 128,
+ param_sink_with_value=True,
+ **kwargs,
+ ):
+ self.vocab_size = vocab_size
+ self.max_position_embeddings = max_position_embeddings
+ self.hidden_size = hidden_size
+ self.num_hidden_layers = num_hidden_layers
+ self.num_attention_heads = num_attention_heads
+ self.num_key_value_heads = num_key_value_heads
+ self.hidden_act = hidden_act
+ self.initializer_range = initializer_range
+ self.rms_norm_eps = rms_norm_eps
+ self.use_cache = use_cache
+ self.rope_theta = rope_theta
+ self.mlp_only_layers = mlp_only_layers
+ self.intermediate_size = intermediate_size
+
+ # MoE arguments
+ self.moe_intermediate_size = moe_intermediate_size
+ self.shared_expert_intermediate_size = shared_expert_intermediate_size
+ self.num_experts_per_tok = num_experts_per_tok
+ self.num_experts = num_experts
+ self.norm_topk_prob = norm_topk_prob
+ self.output_router_logits = output_router_logits
+ self.router_enable_expert_bias = router_enable_expert_bias
+ self.routed_scaling_factor = routed_scaling_factor
+ self.qk_nope_dim = qk_nope_dim
+ self.qk_rope_dim = qk_rope_dim
+ self.v_channels = v_channels
+ self.sandwich_norm = sandwich_norm
+ self.param_sink_number = param_sink_number
+ self.param_sink_with_value = param_sink_with_value
+
+ super().__init__(
+ tie_word_embeddings=tie_word_embeddings,
+ **kwargs,
+ )
diff --git a/doc/omniinfer_for_openpangu_r_72b_2512.md b/doc/omniinfer_for_openpangu_r_72b_2512.md
new file mode 100644
index 0000000000000000000000000000000000000000..b8383b02a12395e12efef17a5da8844454f89260
--- /dev/null
+++ b/doc/omniinfer_for_openpangu_r_72b_2512.md
@@ -0,0 +1,118 @@
+# openPangu-R-72B-2512在Omni-Infer部署指导文档
+
+## 硬件环境和部署方式
+PD混部,只需要1台Atlas 800T A3机器中的4个die。
+
+## 代码和镜像
+- Omni-Infer代码版本:release_v0.7.0
+- 配套镜像:参考 https://gitee.com/omniai/omniinfer/releases 中v0.7.0镜像,以A3硬件和arm架构为例,使用“docker pull swr.cn-east-4.myhuaweicloud.com/omni/omniinfer-a3-arm:release_v0.7.0-vllm”。
+
+## 部署
+### 1. 启动镜像
+```bash
+IMAGE=swr.cn-east-4.myhuaweicloud.com/omni/omniinfer-a3-arm:release_v0.7.0-vllm
+NAME=omniinfer-v0.7.0 # Custom docker name
+NPU_NUM=16 # A3节点die数
+DEVICE_ARGS=$(for i in $(seq 0 $((NPU_NUM-1))); do echo -n "--device /dev/davinci${i} "; done)
+
+# Run the container using the defined variables
+# Note if you are running bridge network with docker, Please expose available ports for multiple nodes communication in advance
+# To prevent device interference from other docker containers, add the argument "--privileged"
+docker run -itd \
+ --name=${NAME} \
+ --network host \
+ --privileged \
+ --ipc=host \
+ $DEVICE_ARGS \
+ --device=/dev/davinci_manager \
+ --device=/dev/devmm_svm \
+ --device=/dev/hisi_hdc \
+ -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \
+ -v /usr/local/Ascend/firmware:/usr/local/Ascend/firmware \
+ -v /usr/local/sbin/npu-smi:/usr/local/sbin/npu-smi \
+ -v /etc/ascend_install.info:/etc/ascend_install.info \
+ -v /mnt/:/mnt/ \
+ -v /data:/data \
+ -v /home/work:/home/work \
+ --entrypoint /bin/bash \
+ swr.cn-east-4.myhuaweicloud.com/omni/omniinfer-a3-arm:release_v0.7.0-vllm
+```
+需要保证模型权重和本项目代码可在容器中访问。进入容器:
+```bash
+docker exec -it $NAME /bin/bash
+```
+
+### 2. 将examples/start_serving_openpangu_r_72b_2512.sh脚本放入omniinfer/tools/scripts路径并执行
+
+```bash
+git clone -b release_v0.7.0 https://gitee.com/omniai/omniinfer.git
+cd omniinfer/tools/scripts
+# 需修改serving脚本中model-path模型路径、master-ip机器IP地址和PYTHONPATH。
+bash start_serving_openpangu_r_72b_2512.sh
+```
+
+### 3. 发请求测试
+
+服务启动后,可发送测试请求。
+
+```bash
+curl http://0.0.0.0:8000/v1/chat/completions \
+ -H "Content-Type: application/json" \
+ -d '{
+ "model": "openpangu_r_72b_2512",
+ "messages": [
+ {
+ "role": "user",
+ "content": "Who are you?"
+ }
+ ],
+ "temperature": 1.0,
+ "top_p": 0.8,
+ "top_k": -1,
+ "vllm_xargs": {"top_n_sigma": 0.05},
+ "chat_template_kwargs": {"think": true, "reasoning_effort": "low"}
+ }'
+ ```
+ ```bash
+ # 工具使用
+curl http://0.0.0.0:8000/v1/chat/completions \
+ -H "Content-Type: application/json" \
+ -d '{
+ "model": "openpangu_r_72b_2512",
+ "messages": [
+ {"role": "system", "content": "你是华为公司开发的盘古模型。\n现在是2025年7月30日"},
+ {"role": "user", "content": "深圳明天的天气如何?"}
+ ],
+ "tools": [
+ {
+ "type": "function",
+ "function": {
+ "name": "get_current_weather",
+ "description": "获取指定城市的当前天气信息,包括温度、湿度、风速等数据。",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "location": {
+ "type": "string",
+ "description": "城市名称,例如:北京、深圳。支持中文或拼音输入。"
+ },
+ "date": {
+ "type": "string",
+ "description": "查询日期,格式为 YYYY-MM-DD(遵循 ISO 8601 标准)。例如:2023-10-01。"
+ }
+ },
+ "required": ["location", "date"],
+ "additionalProperties": "false"
+ }
+ }
+ }
+ ],
+ "temperature": 1.0,
+ "top_p": 0.8,
+ "top_k": -1,
+ "vllm_xargs": {"top_n_sigma": 0.05},
+ "chat_template_kwargs": {"think": true, "reasoning_effort": "high"}
+ }'
+```
+模型默认是慢思考模式,在慢思考模式下,模型支持思维链分档,可通过请求体字段"chat_template_kwargs": {"think": true, "reasoning_effort": "high"}中"reasoning_effort": "high"和"low"平衡模型精度和效率。
+模型的慢思考模式,可通过请求体字段"chat_template_kwargs": {"think": true/false} 开启和关闭。
diff --git a/doc/omniinfer_for_openpangu_r_72b_2512_EN.md b/doc/omniinfer_for_openpangu_r_72b_2512_EN.md
new file mode 100644
index 0000000000000000000000000000000000000000..5473e53fbb1afc607a5ef1e2f57904ea2a3c69ab
--- /dev/null
+++ b/doc/omniinfer_for_openpangu_r_72b_2512_EN.md
@@ -0,0 +1,119 @@
+# Deployment Guide for openPangu-R-72B-2512 on Omni-Infer
+
+## Hardware Environment and Deployment Method
+PD hybrid deployment, requiring only 4 dies of one Atlas 800T A3 machine.
+
+## Codes and Image
+- Omni-Infer code version: release_v0.7.0
+- Docker Image: Refer to the v0.7.0 image in https://gitee.com/omniai/omniinfer/releases. For example, for A3 hardware and ARM architecture, use "docker pull swr.cn-east-4.myhuaweicloud.com/omni/omniinfer-a3-arm:release_v0.7.0-vllm".
+
+## Deployment
+### 1. Launch the image
+```bash
+IMAGE=swr.cn-east-4.myhuaweicloud.com/omni/omniinfer-a3-arm:release_v0.7.0-vllm
+NAME=omniinfer-v0.7.0 # Custom docker name
+NPU_NUM=16 # 16 dies of A3 node
+DEVICE_ARGS=$(for i in $(seq 0 $((NPU_NUM-1))); do echo -n "--device /dev/davinci${i} "; done)
+
+# Run the container using the defined variables
+# Note if you are running bridge network with docker, Please expose available ports for multiple nodes communication in advance
+# To prevent device interference from other docker containers, add the argument "--privileged"
+docker run -itd \
+ --name=${NAME} \
+ --network host \
+ --privileged \
+ --ipc=host \
+ $DEVICE_ARGS \
+ --device=/dev/davinci_manager \
+ --device=/dev/devmm_svm \
+ --device=/dev/hisi_hdc \
+ -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \
+ -v /usr/local/Ascend/firmware:/usr/local/Ascend/firmware \
+ -v /usr/local/sbin/npu-smi:/usr/local/sbin/npu-smi \
+ -v /etc/ascend_install.info:/etc/ascend_install.info \
+ -v /mnt/:/mnt/ \
+ -v /data:/data \
+ -v /home/work:/home/work \
+ --entrypoint /bin/bash \
+ swr.cn-east-4.myhuaweicloud.com/omni/omniinfer-a3-arm:release_v0.7.0-vllm
+```
+Ensure that the model checkpoint and the project code are accessible within the container. Enter the container:
+```bash
+docker exec -it $NAME /bin/bash
+```
+
+### 2. Put examples/start_serving_openpangu_r_72b_2512.sh in the omniinfer/tools/scripts path and start the serving script
+
+```bash
+git clone -b release_v0.7.0 https://gitee.com/omniai/omniinfer.git
+cd omniinfer/tools/scripts
+# You need to modify the model-path, master-ip address and PYTHONPATH in the serving script.
+bash start_serving_openpangu_r_72b_2512.sh
+```
+
+### 3. Send Testing Requests
+
+After the service is started, we can send testing requests.
+
+```bash
+curl http://0.0.0.0:8000/v1/chat/completions \
+ -H "Content-Type: application/json" \
+ -d '{
+ "model": "openpangu_r_72b_2512",
+ "messages": [
+ {
+ "role": "user",
+ "content": "Who are you?"
+ }
+ ],
+ "temperature": 1.0,
+ "top_p": 0.8,
+ "top_k": -1,
+ "vllm_xargs": {"top_n_sigma": 0.05},
+ "chat_template_kwargs": {"think": true, "reasoning_effort": "low"}
+ }'
+ ```
+ ```bash
+# Tool use
+curl http://0.0.0.0:8000/v1/chat/completions \
+ -H "Content-Type: application/json" \
+ -d '{
+ "model": "openpangu_r_72b_2512",
+ "messages": [
+ {"role": "system", "content": "你是华为公司开发的盘古模型。\n现在是2025年7月30日"},
+ {"role": "user", "content": "深圳明天的天气如何?"}
+ ],
+ "tools": [
+ {
+ "type": "function",
+ "function": {
+ "name": "get_current_weather",
+ "description": "获取指定城市的当前天气信息,包括温度、湿度、风速等数据。",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "location": {
+ "type": "string",
+ "description": "城市名称,例如:北京、深圳。支持中文或拼音输入。"
+ },
+ "date": {
+ "type": "string",
+ "description": "查询日期,格式为 YYYY-MM-DD(遵循 ISO 8601 标准)。例如:2023-10-01。"
+ }
+ },
+ "required": ["location", "date"],
+ "additionalProperties": "false"
+ }
+ }
+ }
+ ],
+ "temperature": 1.0,
+ "top_p": 0.8,
+ "top_k": -1,
+ "vllm_xargs": {"top_n_sigma": 0.05},
+ "chat_template_kwargs": {"think": true, "reasoning_effort": "high"}
+ }'
+```
+
+The model is in slow-thinking mode by default. In slow-thinking mode, you can specify different reasoning effort by setting the "reasoning_effort" parameter in "chat_template_kwargs" to "high" or "low" to balance model accuracy and efficiency.
+openPangu-R-72B-2512 supports switching between slow-thinking and fast-thinking mode by setting {"think": true/false} in "chat_template_kwargs".
\ No newline at end of file
diff --git a/examples/start_serving_openpangu_r_72b_2512.sh b/examples/start_serving_openpangu_r_72b_2512.sh
new file mode 100644
index 0000000000000000000000000000000000000000..d7a3d9ad867b4fcd2464cbb947b2290849f991e2
--- /dev/null
+++ b/examples/start_serving_openpangu_r_72b_2512.sh
@@ -0,0 +1,57 @@
+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
+
+export ASCEND_RT_VISIBLE_DEVICES=0,1,2,3
+export VLLM_USE_V1=1
+export VLLM_WORKER_MULTIPROC_METHOD=fork
+export VLLM_ENABLE_MC2=0
+export USING_LCCL_COM=0
+
+export OMNI_USE_PANGU=1
+export ENABLE_PREFILL_TND=1
+
+export HCCL_OP_EXPANSION_MODE="AIV"
+export VLLM_ALLOW_LONG_MAX_MODEL_LEN=1
+export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
+
+export HCCL_RDMA_TIMEOUT=5
+export HCCL_DETERMINISTIC=False
+export ASCEND_GLOBAL_LOG_LEVEL=3
+export CPU_AFFINITY_CONF=2
+export VLLM_LOGGING_LEVEL=INFO
+
+export HCCL_BUFFSIZE=1000
+export HCCL_CONNECT_TIMEOUT=1800
+export HCCL_EXEC_TIMEOUT=1800
+export HCCL_INTRA_ROCE_ENABLE=1
+export HCCL_INTRA_PCIE_ENABLE=0
+export FORCE_ENABLE_CHUNK_PREFILL=1
+
+export USE_REASONING=${USE_REASONING:=1}
+export USE_TOOL=${USE_TOOL:=1}
+if [ "$USE_REASONING" = "1" ]; then
+ reasoning="--reasoning-parser pangu"
+fi
+if [ "$USE_TOOL" = "1" ]; then
+ tools="--enable-auto-tool-choice --tool-call-parser pangu"
+fi
+
+export PYTHONPATH=/path/to/omniinfer/:$PYTHONPATH
+rm -r -f .torchair_cache/
+
+python start_api_servers.py \
+ --num-servers 1 \
+ --model-path /path/to/model/ \
+ --master-ip 0.0.0.0 \
+ --tp 4 \
+ --num-dp 1 \
+ --master-port 3512 \
+ --served-model-name openpangu_r_72b_2512 \
+ --log-dir apiserverlog_pangu72B_hybrid_chunk \
+ --extra-args "--max-num-batched-tokens 2048 --enforce-eager --no-enable-prefix-caching --enable-expert-parallel --max-num-seqs 32 --long-prefill-token-threshold 1024" \
+ --base-api-port 8000 \
+ --gpu-util 0.90 \
+ --no-enable-prefix-caching \
+ --max-model-len 131072 \
+ $reasoning \
+ $tools \
+ --additional-config '{"graph_model_compile_config":{"level":1, "use_ge_graph_cached":true, "decode_gear_list": [32]}, "enable_hybrid_graph_mode": false, "expert_parallel_size": 4, "expert_tensor_parallel_size": 1}' &
diff --git a/generation_config.json b/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..91ad7e0656e52ba69e09a627fc553d1ed2e5d52f
--- /dev/null
+++ b/generation_config.json
@@ -0,0 +1,11 @@
+{
+ "_from_model_config": true,
+ "bos_token_id": 0,
+ "eos_token_id": 45892,
+ "do_sample": true,
+ "temperature": 1.0,
+ "top_p": 0.8,
+ "top_n_sigma": 0.05,
+ "top_k": -1,
+ "transformers_version": "4.48.2"
+}
diff --git a/model-00001-of-000051.safetensors b/model-00001-of-000051.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..29ad59b8d4531754e80d14979ed7498baba4a46f
--- /dev/null
+++ b/model-00001-of-000051.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:faea9c15e58366768bd204ef27f631f1b91e258b1fdfbebb1eea3f481600dd06
+size 1899599744
diff --git a/model-00002-of-000051.safetensors b/model-00002-of-000051.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..00b42dc2d57a77dc1d5757a91754f073aa4d8538
--- /dev/null
+++ b/model-00002-of-000051.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4f92258b47acb3e71c7072e741c117617d49f5d4c4ffcce559d971b9638c478f
+size 484022000
diff --git a/model-00003-of-000051.safetensors b/model-00003-of-000051.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e842b04d4ea09e947a99ae3f694f43ac9db8fd99
--- /dev/null
+++ b/model-00003-of-000051.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:96a77e9c991a6f78b03edada807dff37ba8d6083c7ff4d3afd77d83dad3cd77f
+size 484022000
diff --git a/model-00004-of-000051.safetensors b/model-00004-of-000051.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3c5bc551dd8688ec8331ab3a08dee34b59e9ec1d
--- /dev/null
+++ b/model-00004-of-000051.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c0195156ab96017be8d878d9aec1c55c2ed522180fce949ddf30dc7f028b127b
+size 484022000
diff --git a/model-00005-of-000051.safetensors b/model-00005-of-000051.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..bec98c0bbd684d4b08f718ab72c0e90fa5b17e5e
--- /dev/null
+++ b/model-00005-of-000051.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1f1ae6f3c3bab2c5913505284600cddafba09a887a61496e1bb7c24b56f77b66
+size 3103607896
diff --git a/model-00006-of-000051.safetensors b/model-00006-of-000051.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..eecabf36b7af1a7596d4a1f302ef93b6ad0a1cf8
--- /dev/null
+++ b/model-00006-of-000051.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fb7d09f5bd129ec8eac37d63644c3fdd52b89c6de128712865078b3f6da3a5e1
+size 3103607896
diff --git a/model-00007-of-000051.safetensors b/model-00007-of-000051.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..75c6db46c6a185df7e72b0e5f9514db2413fbae4
--- /dev/null
+++ b/model-00007-of-000051.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9a3346f29473783c660dd4c4af0e7d955cbef3c474e64a751a04bed38665e6dc
+size 3103607896
diff --git a/model-00008-of-000051.safetensors b/model-00008-of-000051.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5cdb482524adff5cb616b3a21950c1c55ba54b99
--- /dev/null
+++ b/model-00008-of-000051.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:52fd330d2faa49e6bca4eea2d8c78489ce6e298e55b9b9fbaff9e84eb1b7c2ce
+size 3103607896
diff --git a/model-00009-of-000051.safetensors b/model-00009-of-000051.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b2d9ca15e49dc3a47e4d4a149f3eb7cbadf03feb
--- /dev/null
+++ b/model-00009-of-000051.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9c59e8651cd10ebe8debe65fc620e1ab68efab24018a8cb7e2e0d2520b929a98
+size 3103607896
diff --git a/model-00010-of-000051.safetensors b/model-00010-of-000051.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..cae939e1abbf8b37c29e8e1890db478cc0fcd343
--- /dev/null
+++ b/model-00010-of-000051.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:348a826c4153084ed0195a2989083772ba28009cb8d2479a501bd9e3e5e26686
+size 3103607896
diff --git a/model-00011-of-000051.safetensors b/model-00011-of-000051.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ecf0814736982645bd927effa746a9363067f17b
--- /dev/null
+++ b/model-00011-of-000051.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f3f60733b0617fdc21159ec975182a44a727ab292dd9d58d9ac7b400075b87a9
+size 3103608152
diff --git a/model-00012-of-000051.safetensors b/model-00012-of-000051.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7c574842e52e013da0473bf58ebcedc9231f4eb4
--- /dev/null
+++ b/model-00012-of-000051.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5536de41c6b14feb5df47b3be85e70a89337cf2480933eb947e1a1dc3d447fc7
+size 3103608152
diff --git a/model-00013-of-000051.safetensors b/model-00013-of-000051.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3414f9cc8567f3e704568dcef5d988b6372eeded
--- /dev/null
+++ b/model-00013-of-000051.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f02a6449935c211e5a4347c73b22763f127e78f74aa736fa5b1abbeb0aaa1c87
+size 3103608152
diff --git a/model-00014-of-000051.safetensors b/model-00014-of-000051.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0bd04de09a9505ef2a626c79d1ee20d3e1d9c844
--- /dev/null
+++ b/model-00014-of-000051.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6f32dd5f9f4414ea611ee0bf5ed343b7604785194a8c50e4c8f57cbec41f8d68
+size 3103608152
diff --git a/model-00015-of-000051.safetensors b/model-00015-of-000051.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6e3df622ffaab5c1a6498b335511a817cb2b5bbf
--- /dev/null
+++ b/model-00015-of-000051.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e4b7666e445966de147f4860a397f93f4b5c7635cbd469ac3c442af7ba939962
+size 3103608152
diff --git a/model-00016-of-000051.safetensors b/model-00016-of-000051.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ab2173892da9de46f39dd3db7e51added214dcf1
--- /dev/null
+++ b/model-00016-of-000051.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0ecb4ad21311887f4c44eeb94b0cfd502821db1dc1d12611b5b652a8a84d4fbe
+size 3103608152
diff --git a/model-00017-of-000051.safetensors b/model-00017-of-000051.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..94f62eac478127a42a62afcea9f41301b0d727ab
--- /dev/null
+++ b/model-00017-of-000051.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:db6908a54fdb8640a406a78c6fb68c035f9cd1982bf4a9853e102941a4f50a2b
+size 3103608152
diff --git a/model-00018-of-000051.safetensors b/model-00018-of-000051.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4eccbec4c8ad554fc1917662127cc9af128edcf0
--- /dev/null
+++ b/model-00018-of-000051.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:10188bcb0c9ce4bd8aa8b4084ccd2bb4bb830f95507f5032fc6ac22168d676be
+size 3103608152
diff --git a/model-00019-of-000051.safetensors b/model-00019-of-000051.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..563c7788258c5528d0a38a608e6eda1cce761cfc
--- /dev/null
+++ b/model-00019-of-000051.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b30d5babd023187eef88826a3b505a06e586fc458b7a2c5da481473f6210e889
+size 3103608152
diff --git a/model-00020-of-000051.safetensors b/model-00020-of-000051.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..473c0315cce86135da1605773c20a79438be8934
--- /dev/null
+++ b/model-00020-of-000051.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:37bf95087b8495dd8065020ccc550d73bfbc99b12419c54dc902aeba8db9d1e6
+size 3103608152
diff --git a/model-00021-of-000051.safetensors b/model-00021-of-000051.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..37117d8a2ac5f5db5ae692f6afd06df44325e129
--- /dev/null
+++ b/model-00021-of-000051.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8741ca6de1c6f1c5c2788023d9a8db0c82b503ec9fe49ba076f07d90c5de92ba
+size 3103608152
diff --git a/model-00022-of-000051.safetensors b/model-00022-of-000051.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..478025654de069000b89cb4300ba2661507f0af4
--- /dev/null
+++ b/model-00022-of-000051.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:12a2c41a55ce55e98be091bc4c2333e4a7e69dee5d79fc222fd27ab30e3ce5cb
+size 3103608152
diff --git a/model-00023-of-000051.safetensors b/model-00023-of-000051.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5f6c134344dba27ea4571914148d2cf0202f5f8a
--- /dev/null
+++ b/model-00023-of-000051.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2b6b91c53ce720c03347dad507730b16ecb9eb9d2bd6c468a91150cd9c23cd30
+size 3103608152
diff --git a/model-00024-of-000051.safetensors b/model-00024-of-000051.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d867d3201b0d7face523ea71af76ab748e80bed8
--- /dev/null
+++ b/model-00024-of-000051.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4d441eaad36da0df2dc9825c6909b7b29d907cfd00089af1a007c45632e5e115
+size 3103608152
diff --git a/model-00025-of-000051.safetensors b/model-00025-of-000051.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..61e08abbf090a851c1a0247915b6edf8ae04264f
--- /dev/null
+++ b/model-00025-of-000051.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6e8f54dbb2414b22a883c110920b7bd84771a2e2dbb78a85b759c7ece195bc6c
+size 3103608152
diff --git a/model-00026-of-000051.safetensors b/model-00026-of-000051.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ab38456084e440f5cc609360fb7343992b2ae951
--- /dev/null
+++ b/model-00026-of-000051.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1cf8a05cb3853131aaff5b1a2357cdd4462cde2d9e57ad35a5779c0f90df361e
+size 3103608152
diff --git a/model-00027-of-000051.safetensors b/model-00027-of-000051.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..79d4e83fa252eec7034bc43c149883f29b5ac1de
--- /dev/null
+++ b/model-00027-of-000051.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ad286b9f5200c6de8a97c0eecccafafbbea7a710716abd0bad30dfc768b0650c
+size 3103608152
diff --git a/model-00028-of-000051.safetensors b/model-00028-of-000051.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..369f1c6cd73f39418d2d40b7a1fede3e52a12d29
--- /dev/null
+++ b/model-00028-of-000051.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:291d66cb2467c36e2642250c40445fdca3cadbfbbf784b66fbbd6356aa3a1a18
+size 3103608152
diff --git a/model-00029-of-000051.safetensors b/model-00029-of-000051.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5392738accac9caf3dfaa2cc8faf691c7925c4b3
--- /dev/null
+++ b/model-00029-of-000051.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f01876779fe5f926a1f4ad070e5cfbdb061dcd424272c35d3e5048254355fa13
+size 3103608152
diff --git a/model-00030-of-000051.safetensors b/model-00030-of-000051.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c771947412f5876fd823148471a029f1d1ca4203
--- /dev/null
+++ b/model-00030-of-000051.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:150c6e3f25cc165d23a8af45ee8859324267fb85825093472421849a2c994bcb
+size 3103608152
diff --git a/model-00031-of-000051.safetensors b/model-00031-of-000051.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1812e020e776ee06db754ae221a735c1538d1706
--- /dev/null
+++ b/model-00031-of-000051.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f83613272553ca7d84303dc88527ee273ba90244e6a8bda34df0b4ba2be337ed
+size 3103608152
diff --git a/model-00032-of-000051.safetensors b/model-00032-of-000051.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c6333e8e775300750cb9455ce6c02337bfc70cbd
--- /dev/null
+++ b/model-00032-of-000051.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0bb600e04ab0220ad2cc50e7bb8025db75c4be9fc9478f356e393d11231ac5cb
+size 3103608152
diff --git a/model-00033-of-000051.safetensors b/model-00033-of-000051.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..088fc0bf9b2d64d523b53690c59b29e80848f858
--- /dev/null
+++ b/model-00033-of-000051.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9b15a5bb9a59506a6ebb7224135be2c2f4816d0cbd16c096404d44dfed953712
+size 3103608152
diff --git a/model-00034-of-000051.safetensors b/model-00034-of-000051.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8b38faffee6276ef5a468a5eae33a31675dc47a2
--- /dev/null
+++ b/model-00034-of-000051.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9c87358d675516d50057fd7baf809508b14a69751805468ef7edd21f9b3b270b
+size 3103608152
diff --git a/model-00035-of-000051.safetensors b/model-00035-of-000051.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5893bbcd2062744542606cc6fe4ebb40872ab93c
--- /dev/null
+++ b/model-00035-of-000051.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:751356ba1cb106cb53f3416dcf1fad91cf799af400b9a4afa30c7a5b227b8839
+size 3103608152
diff --git a/model-00036-of-000051.safetensors b/model-00036-of-000051.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ffeacdc2db2cee0d1dd57008626cd70a8ead9653
--- /dev/null
+++ b/model-00036-of-000051.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:46b580d5e2bc27651b57b1b310bbd8c053f4da6c044f756fb5db78368b1fcfa1
+size 3103608152
diff --git a/model-00037-of-000051.safetensors b/model-00037-of-000051.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..32f58a7994eb84840e4742fe51d43cd43a851355
--- /dev/null
+++ b/model-00037-of-000051.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e1e133139c8e3a80a675625abb4aebed19621c3aa7e5db072850441f2bb9a1c9
+size 3103608152
diff --git a/model-00038-of-000051.safetensors b/model-00038-of-000051.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..defda3b0c908b2c80d5f6e3a159944f34bd3acd6
--- /dev/null
+++ b/model-00038-of-000051.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:206234a10d3ea746595667909a6e6e0dc728d1de719f5bd126b1e54785603bdc
+size 3103608152
diff --git a/model-00039-of-000051.safetensors b/model-00039-of-000051.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a925dba29f5da0f14ba298f355f26188411ed842
--- /dev/null
+++ b/model-00039-of-000051.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:add0f4a5dd9bc63f900f604587c8b00f1f089dc5a8a6b39d751abf98ba11741d
+size 3103608152
diff --git a/model-00040-of-000051.safetensors b/model-00040-of-000051.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8e9af52aa0b0bdcd8a2a145f7f0fffb61a1d1ccf
--- /dev/null
+++ b/model-00040-of-000051.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d9bcf8b3349ccf97a88894e753ffb0232a41bfb78861e44fbc8037657cf4f3d3
+size 3103608152
diff --git a/model-00041-of-000051.safetensors b/model-00041-of-000051.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a7b3bd0512e46b62253632d3dcb37f366e8247a9
--- /dev/null
+++ b/model-00041-of-000051.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:91a0b83bd660237b58b923942182c3d130f18d7f63b0d6a0ff70eea03555409d
+size 3103608152
diff --git a/model-00042-of-000051.safetensors b/model-00042-of-000051.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..034e50c03af432e7c130f6cddf69fc193a83c080
--- /dev/null
+++ b/model-00042-of-000051.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ce999af670ed92a1b0826ec17f05d1e787c57c5bdfacaddd0386f58f7b1a58d2
+size 3103608152
diff --git a/model-00043-of-000051.safetensors b/model-00043-of-000051.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..bccdc61664f909247b033f55e7c03fda9875bc98
--- /dev/null
+++ b/model-00043-of-000051.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d814324c61ee46c59f03425a22ba85831943b23f3ade9d5efe7dec0c8fc3851d
+size 3103608152
diff --git a/model-00044-of-000051.safetensors b/model-00044-of-000051.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..198131b259f0d92efaded232ac3b16e816aea40f
--- /dev/null
+++ b/model-00044-of-000051.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:68128a3631b0c3c05580350b2b8fc6aacfc11adbf7e25a06df624071619aa026
+size 3103608152
diff --git a/model-00045-of-000051.safetensors b/model-00045-of-000051.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..042486a0d2eaca8c87b11bcf9be3932763bbad3e
--- /dev/null
+++ b/model-00045-of-000051.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b1cda178dca892015d852b7f2cb2677f1980ea14d923af2352f4c2f8a0eb6574
+size 3103608152
diff --git a/model-00046-of-000051.safetensors b/model-00046-of-000051.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b9809bfb69126d38baab49bc60131e6f3727081d
--- /dev/null
+++ b/model-00046-of-000051.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e25c7a05912871fa3874e270d8ca5c7259610e4b4f6f0b3e57306603b0ac1985
+size 3103608152
diff --git a/model-00047-of-000051.safetensors b/model-00047-of-000051.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5923631b57423bd4c99a72b9abef18cef4b95d0f
--- /dev/null
+++ b/model-00047-of-000051.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3bb75e10be2ff98a6caf93af1caee89281144e5b955414d1ef68ff9ac5fd428a
+size 3103608152
diff --git a/model-00048-of-000051.safetensors b/model-00048-of-000051.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..9515558fdf1129cbd084fa48ad6498bc3c5a4afb
--- /dev/null
+++ b/model-00048-of-000051.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:750aa728a9a361c34841d55f6042c2da427183e8f320c6370939c0347bcf14b9
+size 3103608152
diff --git a/model-00049-of-000051.safetensors b/model-00049-of-000051.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..56c558d4d895b160496a4291d6072531858c5f26
--- /dev/null
+++ b/model-00049-of-000051.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:70b10aff5a78536a3fdf10f00380eaa4dcab583376b62456e2615c049f272c85
+size 3103608152
diff --git a/model-00050-of-000051.safetensors b/model-00050-of-000051.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5e6b562bb5429064136470578ed8a2ee773295f8
--- /dev/null
+++ b/model-00050-of-000051.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3c7e4217b571f525f50e0a27b699217619fcfc3a433962d8ef9255c4d086d6ce
+size 4519195360
diff --git a/model-00051-of-000051.safetensors b/model-00051-of-000051.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3ad768975cf93d6fdd4d223ca02fcf826e8a982e
--- /dev/null
+++ b/model-00051-of-000051.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7c414a63072a48e501b7b776959ea55a7532d75e1c7b00566282627ca699b13b
+size 6019726520
diff --git a/model.safetensors.index.json b/model.safetensors.index.json
new file mode 100644
index 0000000000000000000000000000000000000000..d77f32df01a098452c9befd2f4aa215f84734e82
--- /dev/null
+++ b/model.safetensors.index.json
@@ -0,0 +1,12002 @@
+{
+ "metadata": {
+ "total_size": 153551464928
+ },
+ "weight_map": {
+ "model.embed_tokens.weight": "model-00001-of-000051.safetensors",
+ "model.layers.0.input_layernorm.weight": "model-00001-of-000051.safetensors",
+ "model.layers.0.pre_mlp_layernorm.weight": "model-00001-of-000051.safetensors",
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-000051.safetensors",
+ "model.layers.0.post_mlp_layernorm.weight": "model-00001-of-000051.safetensors",
+ "model.layers.0.self_attn.qkv_proj.weight": "model-00001-of-000051.safetensors",
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-000051.safetensors",
+ "model.layers.0.self_attn.k_layernorm.weight": "model-00001-of-000051.safetensors",
+ "model.layers.0.self_attn.param_sink_key": "model-00001-of-000051.safetensors",
+ "model.layers.0.self_attn.param_sink_value": "model-00001-of-000051.safetensors",
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-000051.safetensors",
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-000051.safetensors",
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-000051.safetensors",
+ "model.layers.1.input_layernorm.weight": "model-00002-of-000051.safetensors",
+ "model.layers.1.pre_mlp_layernorm.weight": "model-00002-of-000051.safetensors",
+ "model.layers.1.post_attention_layernorm.weight": "model-00002-of-000051.safetensors",
+ "model.layers.1.post_mlp_layernorm.weight": "model-00002-of-000051.safetensors",
+ "model.layers.1.self_attn.qkv_proj.weight": "model-00002-of-000051.safetensors",
+ "model.layers.1.self_attn.o_proj.weight": "model-00002-of-000051.safetensors",
+ "model.layers.1.self_attn.k_layernorm.weight": "model-00002-of-000051.safetensors",
+ "model.layers.1.self_attn.param_sink_key": "model-00002-of-000051.safetensors",
+ "model.layers.1.self_attn.param_sink_value": "model-00002-of-000051.safetensors",
+ "model.layers.1.mlp.gate_proj.weight": "model-00002-of-000051.safetensors",
+ "model.layers.1.mlp.up_proj.weight": "model-00002-of-000051.safetensors",
+ "model.layers.1.mlp.down_proj.weight": "model-00002-of-000051.safetensors",
+ "model.layers.2.input_layernorm.weight": "model-00003-of-000051.safetensors",
+ "model.layers.2.pre_mlp_layernorm.weight": "model-00003-of-000051.safetensors",
+ "model.layers.2.post_attention_layernorm.weight": "model-00003-of-000051.safetensors",
+ "model.layers.2.post_mlp_layernorm.weight": "model-00003-of-000051.safetensors",
+ "model.layers.2.self_attn.qkv_proj.weight": "model-00003-of-000051.safetensors",
+ "model.layers.2.self_attn.o_proj.weight": "model-00003-of-000051.safetensors",
+ "model.layers.2.self_attn.k_layernorm.weight": "model-00003-of-000051.safetensors",
+ "model.layers.2.self_attn.param_sink_key": "model-00003-of-000051.safetensors",
+ "model.layers.2.self_attn.param_sink_value": "model-00003-of-000051.safetensors",
+ "model.layers.2.mlp.gate_proj.weight": "model-00003-of-000051.safetensors",
+ "model.layers.2.mlp.up_proj.weight": "model-00003-of-000051.safetensors",
+ "model.layers.2.mlp.down_proj.weight": "model-00003-of-000051.safetensors",
+ "model.layers.3.input_layernorm.weight": "model-00004-of-000051.safetensors",
+ "model.layers.3.pre_mlp_layernorm.weight": "model-00004-of-000051.safetensors",
+ "model.layers.3.post_attention_layernorm.weight": "model-00004-of-000051.safetensors",
+ "model.layers.3.post_mlp_layernorm.weight": "model-00004-of-000051.safetensors",
+ "model.layers.3.self_attn.qkv_proj.weight": "model-00004-of-000051.safetensors",
+ "model.layers.3.self_attn.o_proj.weight": "model-00004-of-000051.safetensors",
+ "model.layers.3.self_attn.k_layernorm.weight": "model-00004-of-000051.safetensors",
+ "model.layers.3.self_attn.param_sink_key": "model-00004-of-000051.safetensors",
+ "model.layers.3.self_attn.param_sink_value": "model-00004-of-000051.safetensors",
+ "model.layers.3.mlp.gate_proj.weight": "model-00004-of-000051.safetensors",
+ "model.layers.3.mlp.up_proj.weight": "model-00004-of-000051.safetensors",
+ "model.layers.3.mlp.down_proj.weight": "model-00004-of-000051.safetensors",
+ "model.layers.4.input_layernorm.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.pre_mlp_layernorm.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.post_attention_layernorm.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.post_mlp_layernorm.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.self_attn.qkv_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.self_attn.o_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.self_attn.k_layernorm.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.self_attn.param_sink_key": "model-00005-of-000051.safetensors",
+ "model.layers.4.self_attn.param_sink_value": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.gate.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.e_score_correction_bias": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.shared_experts.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.shared_experts.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.shared_experts.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.0.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.0.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.0.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.1.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.1.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.1.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.2.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.2.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.2.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.3.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.3.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.3.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.4.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.4.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.4.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.5.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.5.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.5.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.6.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.6.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.6.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.7.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.7.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.7.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.8.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.8.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.8.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.9.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.9.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.9.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.10.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.10.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.10.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.11.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.11.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.11.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.12.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.12.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.12.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.13.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.13.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.13.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.14.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.14.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.14.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.15.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.15.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.15.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.16.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.16.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.16.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.17.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.17.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.17.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.18.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.18.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.18.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.19.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.19.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.19.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.20.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.20.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.20.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.21.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.21.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.21.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.22.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.22.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.22.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.23.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.23.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.23.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.24.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.24.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.24.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.25.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.25.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.25.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.26.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.26.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.26.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.27.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.27.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.27.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.28.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.28.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.28.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.29.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.29.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.29.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.30.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.30.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.30.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.31.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.31.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.31.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.32.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.32.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.32.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.33.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.33.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.33.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.34.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.34.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.34.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.35.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.35.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.35.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.36.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.36.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.36.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.37.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.37.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.37.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.38.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.38.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.38.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.39.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.39.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.39.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.40.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.40.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.40.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.41.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.41.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.41.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.42.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.42.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.42.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.43.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.43.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.43.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.44.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.44.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.44.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.45.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.45.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.45.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.46.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.46.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.46.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.47.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.47.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.47.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.48.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.48.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.48.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.49.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.49.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.49.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.50.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.50.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.50.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.51.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.51.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.51.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.52.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.52.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.52.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.53.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.53.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.53.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.54.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.54.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.54.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.55.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.55.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.55.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.56.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.56.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.56.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.57.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.57.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.57.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.58.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.58.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.58.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.59.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.59.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.59.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.60.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.60.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.60.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.61.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.61.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.61.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.62.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.62.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.62.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.63.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.63.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.63.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.64.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.64.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.64.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.65.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.65.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.65.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.66.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.66.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.66.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.67.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.67.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.67.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.68.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.68.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.68.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.69.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.69.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.69.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.70.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.70.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.70.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.71.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.71.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.71.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.72.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.72.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.72.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.73.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.73.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.73.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.74.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.74.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.74.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.75.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.75.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.75.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.76.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.76.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.76.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.77.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.77.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.77.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.78.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.78.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.78.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.79.gate_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.79.up_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.4.mlp.experts.79.down_proj.weight": "model-00005-of-000051.safetensors",
+ "model.layers.5.input_layernorm.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.pre_mlp_layernorm.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.post_attention_layernorm.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.post_mlp_layernorm.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.self_attn.qkv_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.self_attn.o_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.self_attn.k_layernorm.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.self_attn.param_sink_key": "model-00006-of-000051.safetensors",
+ "model.layers.5.self_attn.param_sink_value": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.gate.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.e_score_correction_bias": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.shared_experts.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.shared_experts.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.shared_experts.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.0.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.0.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.0.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.1.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.1.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.1.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.2.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.2.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.2.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.3.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.3.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.3.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.4.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.4.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.4.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.5.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.5.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.5.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.6.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.6.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.6.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.7.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.7.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.7.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.8.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.8.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.8.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.9.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.9.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.9.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.10.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.10.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.10.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.11.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.11.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.11.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.12.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.12.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.12.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.13.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.13.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.13.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.14.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.14.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.14.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.15.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.15.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.15.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.16.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.16.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.16.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.17.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.17.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.17.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.18.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.18.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.18.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.19.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.19.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.19.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.20.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.20.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.20.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.21.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.21.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.21.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.22.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.22.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.22.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.23.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.23.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.23.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.24.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.24.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.24.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.25.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.25.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.25.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.26.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.26.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.26.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.27.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.27.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.27.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.28.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.28.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.28.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.29.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.29.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.29.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.30.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.30.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.30.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.31.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.31.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.31.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.32.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.32.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.32.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.33.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.33.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.33.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.34.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.34.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.34.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.35.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.35.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.35.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.36.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.36.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.36.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.37.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.37.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.37.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.38.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.38.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.38.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.39.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.39.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.39.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.40.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.40.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.40.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.41.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.41.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.41.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.42.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.42.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.42.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.43.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.43.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.43.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.44.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.44.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.44.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.45.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.45.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.45.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.46.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.46.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.46.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.47.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.47.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.47.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.48.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.48.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.48.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.49.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.49.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.49.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.50.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.50.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.50.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.51.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.51.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.51.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.52.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.52.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.52.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.53.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.53.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.53.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.54.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.54.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.54.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.55.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.55.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.55.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.56.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.56.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.56.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.57.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.57.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.57.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.58.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.58.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.58.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.59.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.59.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.59.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.60.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.60.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.60.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.61.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.61.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.61.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.62.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.62.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.62.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.63.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.63.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.63.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.64.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.64.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.64.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.65.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.65.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.65.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.66.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.66.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.66.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.67.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.67.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.67.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.68.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.68.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.68.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.69.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.69.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.69.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.70.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.70.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.70.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.71.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.71.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.71.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.72.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.72.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.72.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.73.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.73.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.73.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.74.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.74.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.74.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.75.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.75.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.75.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.76.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.76.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.76.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.77.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.77.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.77.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.78.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.78.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.78.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.79.gate_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.79.up_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.5.mlp.experts.79.down_proj.weight": "model-00006-of-000051.safetensors",
+ "model.layers.6.input_layernorm.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.pre_mlp_layernorm.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.post_attention_layernorm.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.post_mlp_layernorm.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.self_attn.qkv_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.self_attn.o_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.self_attn.k_layernorm.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.self_attn.param_sink_key": "model-00007-of-000051.safetensors",
+ "model.layers.6.self_attn.param_sink_value": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.gate.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.e_score_correction_bias": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.shared_experts.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.shared_experts.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.shared_experts.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.0.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.0.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.0.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.1.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.1.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.1.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.2.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.2.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.2.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.3.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.3.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.3.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.4.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.4.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.4.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.5.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.5.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.5.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.6.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.6.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.6.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.7.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.7.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.7.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.8.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.8.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.8.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.9.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.9.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.9.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.10.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.10.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.10.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.11.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.11.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.11.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.12.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.12.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.12.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.13.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.13.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.13.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.14.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.14.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.14.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.15.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.15.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.15.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.16.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.16.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.16.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.17.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.17.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.17.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.18.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.18.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.18.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.19.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.19.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.19.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.20.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.20.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.20.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.21.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.21.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.21.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.22.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.22.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.22.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.23.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.23.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.23.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.24.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.24.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.24.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.25.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.25.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.25.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.26.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.26.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.26.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.27.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.27.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.27.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.28.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.28.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.28.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.29.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.29.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.29.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.30.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.30.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.30.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.31.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.31.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.31.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.32.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.32.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.32.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.33.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.33.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.33.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.34.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.34.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.34.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.35.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.35.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.35.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.36.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.36.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.36.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.37.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.37.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.37.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.38.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.38.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.38.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.39.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.39.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.39.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.40.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.40.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.40.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.41.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.41.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.41.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.42.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.42.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.42.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.43.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.43.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.43.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.44.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.44.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.44.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.45.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.45.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.45.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.46.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.46.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.46.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.47.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.47.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.47.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.48.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.48.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.48.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.49.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.49.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.49.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.50.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.50.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.50.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.51.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.51.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.51.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.52.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.52.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.52.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.53.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.53.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.53.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.54.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.54.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.54.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.55.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.55.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.55.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.56.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.56.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.56.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.57.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.57.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.57.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.58.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.58.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.58.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.59.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.59.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.59.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.60.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.60.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.60.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.61.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.61.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.61.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.62.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.62.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.62.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.63.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.63.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.63.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.64.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.64.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.64.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.65.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.65.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.65.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.66.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.66.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.66.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.67.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.67.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.67.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.68.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.68.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.68.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.69.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.69.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.69.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.70.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.70.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.70.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.71.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.71.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.71.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.72.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.72.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.72.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.73.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.73.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.73.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.74.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.74.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.74.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.75.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.75.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.75.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.76.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.76.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.76.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.77.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.77.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.77.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.78.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.78.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.78.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.79.gate_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.79.up_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.6.mlp.experts.79.down_proj.weight": "model-00007-of-000051.safetensors",
+ "model.layers.7.input_layernorm.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.pre_mlp_layernorm.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.post_attention_layernorm.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.post_mlp_layernorm.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.self_attn.qkv_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.self_attn.o_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.self_attn.k_layernorm.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.self_attn.param_sink_key": "model-00008-of-000051.safetensors",
+ "model.layers.7.self_attn.param_sink_value": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.gate.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.e_score_correction_bias": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.shared_experts.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.shared_experts.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.shared_experts.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.0.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.0.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.0.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.1.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.1.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.1.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.2.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.2.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.2.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.3.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.3.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.3.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.4.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.4.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.4.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.5.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.5.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.5.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.6.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.6.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.6.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.7.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.7.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.7.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.8.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.8.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.8.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.9.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.9.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.9.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.10.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.10.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.10.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.11.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.11.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.11.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.12.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.12.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.12.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.13.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.13.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.13.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.14.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.14.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.14.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.15.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.15.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.15.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.16.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.16.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.16.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.17.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.17.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.17.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.18.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.18.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.18.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.19.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.19.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.19.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.20.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.20.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.20.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.21.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.21.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.21.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.22.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.22.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.22.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.23.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.23.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.23.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.24.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.24.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.24.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.25.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.25.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.25.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.26.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.26.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.26.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.27.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.27.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.27.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.28.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.28.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.28.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.29.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.29.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.29.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.30.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.30.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.30.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.31.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.31.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.31.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.32.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.32.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.32.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.33.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.33.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.33.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.34.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.34.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.34.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.35.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.35.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.35.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.36.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.36.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.36.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.37.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.37.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.37.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.38.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.38.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.38.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.39.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.39.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.39.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.40.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.40.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.40.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.41.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.41.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.41.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.42.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.42.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.42.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.43.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.43.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.43.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.44.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.44.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.44.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.45.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.45.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.45.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.46.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.46.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.46.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.47.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.47.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.47.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.48.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.48.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.48.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.49.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.49.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.49.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.50.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.50.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.50.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.51.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.51.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.51.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.52.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.52.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.52.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.53.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.53.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.53.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.54.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.54.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.54.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.55.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.55.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.55.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.56.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.56.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.56.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.57.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.57.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.57.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.58.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.58.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.58.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.59.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.59.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.59.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.60.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.60.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.60.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.61.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.61.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.61.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.62.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.62.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.62.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.63.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.63.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.63.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.64.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.64.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.64.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.65.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.65.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.65.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.66.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.66.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.66.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.67.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.67.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.67.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.68.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.68.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.68.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.69.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.69.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.69.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.70.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.70.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.70.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.71.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.71.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.71.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.72.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.72.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.72.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.73.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.73.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.73.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.74.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.74.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.74.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.75.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.75.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.75.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.76.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.76.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.76.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.77.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.77.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.77.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.78.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.78.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.78.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.79.gate_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.79.up_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.7.mlp.experts.79.down_proj.weight": "model-00008-of-000051.safetensors",
+ "model.layers.8.input_layernorm.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.pre_mlp_layernorm.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.post_attention_layernorm.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.post_mlp_layernorm.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.self_attn.qkv_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.self_attn.o_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.self_attn.k_layernorm.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.self_attn.param_sink_key": "model-00009-of-000051.safetensors",
+ "model.layers.8.self_attn.param_sink_value": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.gate.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.e_score_correction_bias": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.shared_experts.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.shared_experts.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.shared_experts.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.0.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.0.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.0.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.1.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.1.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.1.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.2.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.2.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.2.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.3.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.3.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.3.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.4.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.4.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.4.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.5.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.5.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.5.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.6.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.6.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.6.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.7.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.7.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.7.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.8.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.8.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.8.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.9.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.9.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.9.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.10.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.10.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.10.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.11.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.11.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.11.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.12.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.12.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.12.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.13.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.13.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.13.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.14.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.14.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.14.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.15.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.15.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.15.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.16.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.16.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.16.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.17.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.17.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.17.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.18.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.18.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.18.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.19.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.19.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.19.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.20.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.20.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.20.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.21.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.21.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.21.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.22.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.22.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.22.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.23.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.23.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.23.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.24.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.24.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.24.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.25.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.25.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.25.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.26.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.26.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.26.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.27.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.27.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.27.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.28.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.28.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.28.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.29.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.29.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.29.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.30.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.30.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.30.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.31.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.31.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.31.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.32.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.32.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.32.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.33.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.33.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.33.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.34.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.34.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.34.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.35.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.35.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.35.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.36.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.36.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.36.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.37.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.37.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.37.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.38.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.38.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.38.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.39.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.39.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.39.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.40.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.40.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.40.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.41.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.41.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.41.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.42.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.42.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.42.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.43.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.43.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.43.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.44.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.44.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.44.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.45.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.45.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.45.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.46.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.46.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.46.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.47.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.47.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.47.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.48.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.48.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.48.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.49.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.49.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.49.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.50.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.50.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.50.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.51.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.51.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.51.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.52.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.52.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.52.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.53.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.53.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.53.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.54.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.54.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.54.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.55.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.55.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.55.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.56.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.56.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.56.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.57.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.57.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.57.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.58.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.58.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.58.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.59.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.59.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.59.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.60.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.60.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.60.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.61.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.61.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.61.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.62.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.62.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.62.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.63.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.63.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.63.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.64.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.64.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.64.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.65.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.65.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.65.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.66.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.66.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.66.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.67.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.67.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.67.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.68.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.68.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.68.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.69.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.69.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.69.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.70.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.70.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.70.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.71.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.71.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.71.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.72.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.72.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.72.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.73.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.73.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.73.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.74.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.74.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.74.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.75.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.75.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.75.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.76.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.76.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.76.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.77.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.77.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.77.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.78.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.78.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.78.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.79.gate_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.79.up_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.8.mlp.experts.79.down_proj.weight": "model-00009-of-000051.safetensors",
+ "model.layers.9.input_layernorm.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.pre_mlp_layernorm.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.post_attention_layernorm.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.post_mlp_layernorm.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.self_attn.qkv_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.self_attn.o_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.self_attn.k_layernorm.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.self_attn.param_sink_key": "model-00010-of-000051.safetensors",
+ "model.layers.9.self_attn.param_sink_value": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.gate.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.e_score_correction_bias": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.shared_experts.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.shared_experts.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.shared_experts.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.0.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.0.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.0.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.1.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.1.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.1.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.2.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.2.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.2.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.3.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.3.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.3.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.4.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.4.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.4.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.5.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.5.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.5.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.6.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.6.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.6.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.7.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.7.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.7.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.8.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.8.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.8.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.9.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.9.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.9.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.10.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.10.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.10.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.11.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.11.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.11.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.12.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.12.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.12.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.13.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.13.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.13.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.14.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.14.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.14.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.15.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.15.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.15.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.16.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.16.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.16.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.17.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.17.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.17.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.18.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.18.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.18.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.19.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.19.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.19.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.20.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.20.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.20.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.21.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.21.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.21.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.22.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.22.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.22.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.23.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.23.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.23.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.24.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.24.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.24.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.25.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.25.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.25.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.26.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.26.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.26.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.27.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.27.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.27.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.28.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.28.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.28.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.29.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.29.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.29.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.30.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.30.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.30.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.31.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.31.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.31.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.32.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.32.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.32.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.33.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.33.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.33.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.34.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.34.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.34.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.35.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.35.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.35.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.36.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.36.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.36.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.37.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.37.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.37.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.38.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.38.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.38.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.39.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.39.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.39.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.40.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.40.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.40.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.41.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.41.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.41.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.42.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.42.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.42.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.43.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.43.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.43.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.44.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.44.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.44.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.45.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.45.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.45.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.46.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.46.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.46.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.47.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.47.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.47.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.48.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.48.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.48.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.49.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.49.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.49.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.50.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.50.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.50.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.51.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.51.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.51.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.52.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.52.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.52.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.53.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.53.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.53.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.54.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.54.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.54.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.55.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.55.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.55.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.56.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.56.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.56.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.57.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.57.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.57.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.58.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.58.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.58.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.59.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.59.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.59.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.60.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.60.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.60.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.61.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.61.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.61.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.62.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.62.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.62.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.63.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.63.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.63.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.64.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.64.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.64.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.65.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.65.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.65.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.66.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.66.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.66.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.67.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.67.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.67.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.68.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.68.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.68.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.69.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.69.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.69.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.70.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.70.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.70.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.71.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.71.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.71.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.72.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.72.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.72.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.73.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.73.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.73.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.74.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.74.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.74.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.75.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.75.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.75.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.76.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.76.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.76.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.77.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.77.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.77.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.78.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.78.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.78.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.79.gate_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.79.up_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.9.mlp.experts.79.down_proj.weight": "model-00010-of-000051.safetensors",
+ "model.layers.10.input_layernorm.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.pre_mlp_layernorm.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.post_attention_layernorm.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.post_mlp_layernorm.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.self_attn.qkv_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.self_attn.o_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.self_attn.k_layernorm.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.self_attn.param_sink_key": "model-00011-of-000051.safetensors",
+ "model.layers.10.self_attn.param_sink_value": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.gate.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.e_score_correction_bias": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.shared_experts.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.shared_experts.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.shared_experts.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.0.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.0.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.0.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.1.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.1.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.1.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.2.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.2.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.2.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.3.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.3.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.3.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.4.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.4.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.4.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.5.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.5.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.5.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.6.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.6.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.6.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.7.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.7.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.7.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.8.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.8.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.8.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.9.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.9.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.9.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.10.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.10.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.10.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.11.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.11.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.11.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.12.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.12.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.12.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.13.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.13.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.13.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.14.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.14.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.14.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.15.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.15.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.15.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.16.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.16.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.16.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.17.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.17.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.17.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.18.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.18.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.18.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.19.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.19.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.19.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.20.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.20.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.20.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.21.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.21.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.21.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.22.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.22.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.22.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.23.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.23.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.23.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.24.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.24.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.24.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.25.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.25.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.25.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.26.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.26.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.26.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.27.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.27.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.27.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.28.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.28.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.28.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.29.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.29.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.29.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.30.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.30.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.30.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.31.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.31.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.31.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.32.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.32.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.32.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.33.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.33.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.33.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.34.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.34.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.34.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.35.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.35.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.35.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.36.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.36.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.36.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.37.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.37.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.37.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.38.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.38.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.38.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.39.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.39.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.39.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.40.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.40.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.40.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.41.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.41.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.41.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.42.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.42.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.42.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.43.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.43.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.43.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.44.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.44.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.44.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.45.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.45.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.45.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.46.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.46.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.46.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.47.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.47.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.47.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.48.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.48.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.48.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.49.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.49.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.49.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.50.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.50.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.50.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.51.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.51.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.51.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.52.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.52.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.52.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.53.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.53.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.53.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.54.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.54.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.54.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.55.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.55.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.55.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.56.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.56.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.56.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.57.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.57.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.57.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.58.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.58.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.58.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.59.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.59.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.59.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.60.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.60.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.60.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.61.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.61.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.61.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.62.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.62.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.62.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.63.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.63.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.63.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.64.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.64.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.64.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.65.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.65.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.65.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.66.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.66.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.66.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.67.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.67.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.67.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.68.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.68.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.68.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.69.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.69.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.69.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.70.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.70.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.70.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.71.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.71.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.71.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.72.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.72.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.72.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.73.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.73.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.73.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.74.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.74.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.74.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.75.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.75.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.75.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.76.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.76.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.76.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.77.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.77.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.77.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.78.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.78.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.78.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.79.gate_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.79.up_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.10.mlp.experts.79.down_proj.weight": "model-00011-of-000051.safetensors",
+ "model.layers.11.input_layernorm.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.pre_mlp_layernorm.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.post_attention_layernorm.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.post_mlp_layernorm.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.self_attn.qkv_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.self_attn.o_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.self_attn.k_layernorm.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.self_attn.param_sink_key": "model-00012-of-000051.safetensors",
+ "model.layers.11.self_attn.param_sink_value": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.gate.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.e_score_correction_bias": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.shared_experts.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.shared_experts.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.shared_experts.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.0.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.0.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.0.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.1.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.1.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.1.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.2.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.2.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.2.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.3.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.3.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.3.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.4.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.4.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.4.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.5.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.5.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.5.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.6.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.6.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.6.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.7.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.7.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.7.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.8.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.8.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.8.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.9.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.9.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.9.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.10.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.10.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.10.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.11.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.11.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.11.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.12.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.12.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.12.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.13.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.13.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.13.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.14.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.14.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.14.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.15.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.15.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.15.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.16.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.16.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.16.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.17.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.17.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.17.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.18.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.18.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.18.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.19.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.19.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.19.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.20.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.20.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.20.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.21.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.21.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.21.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.22.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.22.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.22.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.23.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.23.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.23.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.24.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.24.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.24.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.25.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.25.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.25.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.26.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.26.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.26.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.27.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.27.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.27.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.28.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.28.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.28.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.29.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.29.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.29.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.30.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.30.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.30.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.31.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.31.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.31.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.32.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.32.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.32.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.33.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.33.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.33.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.34.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.34.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.34.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.35.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.35.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.35.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.36.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.36.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.36.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.37.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.37.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.37.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.38.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.38.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.38.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.39.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.39.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.39.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.40.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.40.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.40.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.41.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.41.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.41.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.42.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.42.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.42.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.43.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.43.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.43.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.44.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.44.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.44.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.45.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.45.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.45.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.46.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.46.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.46.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.47.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.47.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.47.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.48.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.48.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.48.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.49.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.49.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.49.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.50.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.50.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.50.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.51.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.51.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.51.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.52.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.52.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.52.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.53.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.53.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.53.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.54.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.54.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.54.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.55.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.55.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.55.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.56.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.56.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.56.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.57.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.57.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.57.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.58.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.58.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.58.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.59.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.59.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.59.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.60.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.60.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.60.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.61.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.61.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.61.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.62.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.62.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.62.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.63.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.63.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.63.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.64.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.64.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.64.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.65.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.65.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.65.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.66.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.66.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.66.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.67.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.67.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.67.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.68.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.68.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.68.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.69.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.69.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.69.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.70.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.70.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.70.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.71.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.71.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.71.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.72.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.72.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.72.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.73.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.73.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.73.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.74.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.74.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.74.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.75.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.75.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.75.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.76.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.76.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.76.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.77.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.77.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.77.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.78.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.78.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.78.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.79.gate_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.79.up_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.11.mlp.experts.79.down_proj.weight": "model-00012-of-000051.safetensors",
+ "model.layers.12.input_layernorm.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.pre_mlp_layernorm.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.post_attention_layernorm.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.post_mlp_layernorm.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.self_attn.qkv_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.self_attn.o_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.self_attn.k_layernorm.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.self_attn.param_sink_key": "model-00013-of-000051.safetensors",
+ "model.layers.12.self_attn.param_sink_value": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.gate.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.e_score_correction_bias": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.shared_experts.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.shared_experts.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.shared_experts.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.0.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.0.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.0.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.1.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.1.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.1.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.2.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.2.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.2.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.3.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.3.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.3.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.4.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.4.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.4.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.5.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.5.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.5.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.6.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.6.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.6.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.7.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.7.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.7.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.8.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.8.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.8.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.9.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.9.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.9.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.10.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.10.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.10.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.11.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.11.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.11.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.12.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.12.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.12.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.13.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.13.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.13.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.14.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.14.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.14.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.15.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.15.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.15.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.16.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.16.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.16.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.17.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.17.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.17.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.18.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.18.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.18.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.19.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.19.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.19.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.20.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.20.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.20.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.21.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.21.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.21.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.22.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.22.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.22.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.23.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.23.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.23.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.24.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.24.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.24.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.25.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.25.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.25.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.26.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.26.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.26.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.27.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.27.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.27.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.28.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.28.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.28.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.29.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.29.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.29.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.30.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.30.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.30.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.31.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.31.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.31.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.32.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.32.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.32.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.33.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.33.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.33.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.34.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.34.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.34.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.35.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.35.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.35.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.36.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.36.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.36.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.37.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.37.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.37.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.38.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.38.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.38.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.39.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.39.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.39.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.40.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.40.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.40.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.41.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.41.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.41.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.42.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.42.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.42.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.43.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.43.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.43.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.44.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.44.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.44.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.45.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.45.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.45.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.46.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.46.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.46.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.47.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.47.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.47.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.48.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.48.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.48.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.49.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.49.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.49.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.50.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.50.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.50.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.51.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.51.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.51.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.52.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.52.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.52.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.53.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.53.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.53.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.54.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.54.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.54.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.55.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.55.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.55.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.56.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.56.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.56.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.57.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.57.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.57.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.58.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.58.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.58.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.59.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.59.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.59.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.60.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.60.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.60.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.61.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.61.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.61.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.62.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.62.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.62.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.63.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.63.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.63.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.64.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.64.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.64.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.65.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.65.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.65.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.66.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.66.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.66.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.67.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.67.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.67.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.68.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.68.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.68.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.69.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.69.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.69.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.70.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.70.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.70.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.71.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.71.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.71.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.72.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.72.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.72.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.73.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.73.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.73.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.74.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.74.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.74.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.75.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.75.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.75.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.76.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.76.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.76.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.77.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.77.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.77.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.78.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.78.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.78.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.79.gate_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.79.up_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.12.mlp.experts.79.down_proj.weight": "model-00013-of-000051.safetensors",
+ "model.layers.13.input_layernorm.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.pre_mlp_layernorm.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.post_attention_layernorm.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.post_mlp_layernorm.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.self_attn.qkv_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.self_attn.o_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.self_attn.k_layernorm.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.self_attn.param_sink_key": "model-00014-of-000051.safetensors",
+ "model.layers.13.self_attn.param_sink_value": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.gate.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.e_score_correction_bias": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.shared_experts.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.shared_experts.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.shared_experts.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.0.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.0.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.0.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.1.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.1.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.1.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.2.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.2.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.2.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.3.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.3.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.3.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.4.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.4.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.4.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.5.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.5.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.5.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.6.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.6.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.6.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.7.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.7.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.7.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.8.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.8.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.8.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.9.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.9.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.9.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.10.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.10.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.10.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.11.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.11.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.11.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.12.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.12.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.12.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.13.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.13.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.13.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.14.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.14.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.14.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.15.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.15.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.15.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.16.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.16.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.16.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.17.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.17.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.17.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.18.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.18.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.18.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.19.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.19.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.19.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.20.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.20.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.20.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.21.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.21.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.21.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.22.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.22.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.22.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.23.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.23.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.23.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.24.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.24.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.24.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.25.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.25.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.25.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.26.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.26.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.26.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.27.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.27.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.27.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.28.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.28.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.28.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.29.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.29.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.29.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.30.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.30.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.30.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.31.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.31.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.31.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.32.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.32.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.32.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.33.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.33.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.33.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.34.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.34.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.34.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.35.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.35.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.35.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.36.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.36.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.36.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.37.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.37.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.37.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.38.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.38.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.38.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.39.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.39.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.39.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.40.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.40.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.40.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.41.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.41.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.41.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.42.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.42.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.42.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.43.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.43.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.43.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.44.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.44.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.44.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.45.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.45.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.45.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.46.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.46.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.46.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.47.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.47.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.47.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.48.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.48.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.48.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.49.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.49.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.49.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.50.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.50.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.50.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.51.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.51.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.51.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.52.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.52.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.52.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.53.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.53.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.53.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.54.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.54.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.54.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.55.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.55.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.55.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.56.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.56.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.56.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.57.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.57.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.57.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.58.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.58.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.58.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.59.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.59.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.59.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.60.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.60.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.60.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.61.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.61.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.61.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.62.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.62.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.62.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.63.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.63.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.63.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.64.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.64.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.64.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.65.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.65.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.65.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.66.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.66.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.66.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.67.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.67.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.67.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.68.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.68.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.68.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.69.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.69.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.69.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.70.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.70.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.70.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.71.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.71.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.71.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.72.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.72.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.72.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.73.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.73.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.73.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.74.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.74.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.74.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.75.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.75.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.75.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.76.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.76.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.76.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.77.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.77.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.77.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.78.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.78.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.78.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.79.gate_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.79.up_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.13.mlp.experts.79.down_proj.weight": "model-00014-of-000051.safetensors",
+ "model.layers.14.input_layernorm.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.pre_mlp_layernorm.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.post_attention_layernorm.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.post_mlp_layernorm.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.self_attn.qkv_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.self_attn.o_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.self_attn.k_layernorm.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.self_attn.param_sink_key": "model-00015-of-000051.safetensors",
+ "model.layers.14.self_attn.param_sink_value": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.gate.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.e_score_correction_bias": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.shared_experts.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.shared_experts.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.shared_experts.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.0.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.0.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.0.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.1.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.1.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.1.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.2.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.2.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.2.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.3.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.3.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.3.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.4.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.4.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.4.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.5.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.5.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.5.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.6.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.6.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.6.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.7.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.7.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.7.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.8.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.8.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.8.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.9.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.9.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.9.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.10.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.10.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.10.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.11.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.11.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.11.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.12.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.12.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.12.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.13.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.13.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.13.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.14.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.14.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.14.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.15.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.15.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.15.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.16.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.16.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.16.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.17.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.17.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.17.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.18.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.18.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.18.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.19.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.19.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.19.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.20.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.20.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.20.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.21.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.21.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.21.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.22.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.22.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.22.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.23.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.23.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.23.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.24.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.24.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.24.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.25.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.25.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.25.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.26.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.26.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.26.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.27.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.27.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.27.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.28.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.28.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.28.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.29.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.29.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.29.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.30.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.30.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.30.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.31.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.31.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.31.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.32.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.32.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.32.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.33.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.33.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.33.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.34.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.34.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.34.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.35.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.35.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.35.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.36.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.36.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.36.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.37.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.37.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.37.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.38.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.38.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.38.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.39.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.39.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.39.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.40.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.40.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.40.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.41.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.41.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.41.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.42.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.42.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.42.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.43.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.43.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.43.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.44.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.44.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.44.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.45.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.45.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.45.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.46.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.46.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.46.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.47.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.47.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.47.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.48.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.48.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.48.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.49.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.49.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.49.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.50.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.50.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.50.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.51.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.51.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.51.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.52.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.52.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.52.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.53.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.53.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.53.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.54.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.54.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.54.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.55.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.55.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.55.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.56.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.56.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.56.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.57.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.57.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.57.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.58.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.58.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.58.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.59.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.59.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.59.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.60.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.60.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.60.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.61.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.61.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.61.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.62.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.62.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.62.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.63.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.63.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.63.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.64.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.64.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.64.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.65.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.65.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.65.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.66.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.66.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.66.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.67.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.67.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.67.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.68.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.68.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.68.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.69.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.69.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.69.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.70.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.70.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.70.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.71.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.71.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.71.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.72.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.72.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.72.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.73.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.73.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.73.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.74.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.74.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.74.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.75.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.75.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.75.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.76.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.76.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.76.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.77.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.77.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.77.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.78.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.78.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.78.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.79.gate_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.79.up_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.14.mlp.experts.79.down_proj.weight": "model-00015-of-000051.safetensors",
+ "model.layers.15.input_layernorm.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.pre_mlp_layernorm.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.post_attention_layernorm.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.post_mlp_layernorm.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.self_attn.qkv_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.self_attn.o_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.self_attn.k_layernorm.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.self_attn.param_sink_key": "model-00016-of-000051.safetensors",
+ "model.layers.15.self_attn.param_sink_value": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.gate.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.e_score_correction_bias": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.shared_experts.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.shared_experts.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.shared_experts.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.0.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.0.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.0.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.1.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.1.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.1.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.2.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.2.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.2.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.3.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.3.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.3.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.4.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.4.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.4.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.5.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.5.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.5.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.6.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.6.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.6.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.7.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.7.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.7.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.8.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.8.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.8.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.9.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.9.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.9.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.10.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.10.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.10.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.11.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.11.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.11.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.12.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.12.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.12.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.13.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.13.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.13.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.14.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.14.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.14.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.15.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.15.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.15.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.16.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.16.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.16.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.17.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.17.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.17.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.18.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.18.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.18.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.19.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.19.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.19.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.20.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.20.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.20.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.21.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.21.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.21.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.22.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.22.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.22.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.23.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.23.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.23.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.24.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.24.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.24.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.25.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.25.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.25.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.26.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.26.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.26.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.27.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.27.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.27.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.28.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.28.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.28.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.29.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.29.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.29.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.30.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.30.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.30.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.31.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.31.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.31.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.32.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.32.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.32.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.33.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.33.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.33.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.34.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.34.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.34.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.35.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.35.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.35.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.36.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.36.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.36.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.37.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.37.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.37.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.38.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.38.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.38.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.39.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.39.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.39.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.40.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.40.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.40.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.41.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.41.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.41.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.42.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.42.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.42.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.43.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.43.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.43.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.44.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.44.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.44.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.45.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.45.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.45.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.46.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.46.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.46.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.47.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.47.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.47.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.48.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.48.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.48.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.49.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.49.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.49.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.50.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.50.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.50.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.51.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.51.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.51.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.52.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.52.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.52.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.53.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.53.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.53.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.54.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.54.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.54.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.55.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.55.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.55.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.56.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.56.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.56.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.57.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.57.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.57.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.58.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.58.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.58.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.59.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.59.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.59.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.60.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.60.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.60.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.61.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.61.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.61.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.62.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.62.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.62.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.63.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.63.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.63.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.64.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.64.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.64.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.65.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.65.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.65.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.66.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.66.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.66.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.67.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.67.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.67.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.68.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.68.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.68.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.69.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.69.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.69.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.70.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.70.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.70.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.71.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.71.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.71.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.72.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.72.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.72.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.73.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.73.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.73.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.74.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.74.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.74.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.75.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.75.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.75.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.76.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.76.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.76.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.77.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.77.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.77.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.78.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.78.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.78.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.79.gate_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.79.up_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.15.mlp.experts.79.down_proj.weight": "model-00016-of-000051.safetensors",
+ "model.layers.16.input_layernorm.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.pre_mlp_layernorm.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.post_attention_layernorm.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.post_mlp_layernorm.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.self_attn.qkv_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.self_attn.o_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.self_attn.k_layernorm.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.self_attn.param_sink_key": "model-00017-of-000051.safetensors",
+ "model.layers.16.self_attn.param_sink_value": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.gate.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.e_score_correction_bias": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.shared_experts.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.shared_experts.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.shared_experts.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.0.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.0.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.0.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.1.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.1.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.1.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.2.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.2.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.2.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.3.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.3.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.3.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.4.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.4.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.4.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.5.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.5.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.5.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.6.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.6.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.6.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.7.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.7.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.7.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.8.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.8.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.8.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.9.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.9.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.9.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.10.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.10.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.10.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.11.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.11.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.11.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.12.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.12.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.12.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.13.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.13.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.13.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.14.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.14.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.14.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.15.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.15.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.15.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.16.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.16.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.16.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.17.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.17.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.17.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.18.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.18.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.18.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.19.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.19.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.19.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.20.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.20.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.20.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.21.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.21.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.21.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.22.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.22.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.22.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.23.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.23.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.23.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.24.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.24.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.24.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.25.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.25.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.25.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.26.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.26.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.26.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.27.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.27.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.27.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.28.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.28.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.28.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.29.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.29.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.29.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.30.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.30.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.30.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.31.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.31.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.31.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.32.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.32.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.32.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.33.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.33.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.33.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.34.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.34.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.34.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.35.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.35.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.35.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.36.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.36.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.36.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.37.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.37.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.37.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.38.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.38.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.38.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.39.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.39.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.39.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.40.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.40.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.40.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.41.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.41.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.41.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.42.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.42.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.42.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.43.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.43.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.43.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.44.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.44.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.44.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.45.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.45.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.45.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.46.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.46.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.46.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.47.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.47.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.47.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.48.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.48.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.48.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.49.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.49.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.49.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.50.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.50.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.50.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.51.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.51.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.51.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.52.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.52.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.52.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.53.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.53.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.53.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.54.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.54.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.54.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.55.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.55.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.55.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.56.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.56.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.56.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.57.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.57.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.57.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.58.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.58.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.58.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.59.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.59.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.59.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.60.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.60.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.60.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.61.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.61.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.61.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.62.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.62.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.62.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.63.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.63.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.63.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.64.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.64.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.64.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.65.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.65.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.65.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.66.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.66.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.66.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.67.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.67.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.67.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.68.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.68.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.68.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.69.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.69.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.69.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.70.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.70.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.70.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.71.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.71.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.71.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.72.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.72.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.72.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.73.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.73.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.73.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.74.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.74.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.74.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.75.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.75.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.75.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.76.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.76.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.76.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.77.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.77.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.77.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.78.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.78.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.78.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.79.gate_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.79.up_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.16.mlp.experts.79.down_proj.weight": "model-00017-of-000051.safetensors",
+ "model.layers.17.input_layernorm.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.pre_mlp_layernorm.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.post_attention_layernorm.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.post_mlp_layernorm.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.self_attn.qkv_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.self_attn.o_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.self_attn.k_layernorm.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.self_attn.param_sink_key": "model-00018-of-000051.safetensors",
+ "model.layers.17.self_attn.param_sink_value": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.gate.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.e_score_correction_bias": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.shared_experts.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.shared_experts.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.shared_experts.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.0.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.0.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.0.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.1.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.1.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.1.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.2.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.2.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.2.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.3.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.3.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.3.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.4.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.4.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.4.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.5.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.5.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.5.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.6.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.6.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.6.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.7.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.7.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.7.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.8.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.8.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.8.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.9.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.9.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.9.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.10.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.10.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.10.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.11.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.11.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.11.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.12.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.12.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.12.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.13.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.13.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.13.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.14.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.14.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.14.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.15.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.15.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.15.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.16.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.16.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.16.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.17.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.17.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.17.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.18.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.18.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.18.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.19.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.19.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.19.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.20.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.20.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.20.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.21.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.21.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.21.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.22.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.22.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.22.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.23.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.23.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.23.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.24.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.24.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.24.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.25.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.25.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.25.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.26.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.26.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.26.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.27.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.27.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.27.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.28.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.28.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.28.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.29.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.29.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.29.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.30.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.30.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.30.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.31.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.31.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.31.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.32.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.32.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.32.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.33.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.33.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.33.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.34.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.34.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.34.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.35.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.35.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.35.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.36.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.36.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.36.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.37.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.37.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.37.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.38.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.38.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.38.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.39.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.39.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.39.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.40.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.40.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.40.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.41.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.41.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.41.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.42.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.42.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.42.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.43.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.43.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.43.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.44.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.44.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.44.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.45.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.45.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.45.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.46.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.46.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.46.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.47.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.47.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.47.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.48.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.48.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.48.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.49.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.49.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.49.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.50.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.50.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.50.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.51.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.51.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.51.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.52.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.52.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.52.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.53.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.53.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.53.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.54.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.54.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.54.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.55.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.55.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.55.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.56.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.56.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.56.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.57.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.57.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.57.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.58.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.58.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.58.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.59.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.59.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.59.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.60.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.60.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.60.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.61.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.61.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.61.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.62.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.62.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.62.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.63.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.63.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.63.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.64.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.64.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.64.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.65.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.65.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.65.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.66.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.66.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.66.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.67.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.67.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.67.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.68.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.68.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.68.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.69.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.69.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.69.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.70.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.70.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.70.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.71.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.71.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.71.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.72.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.72.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.72.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.73.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.73.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.73.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.74.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.74.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.74.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.75.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.75.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.75.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.76.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.76.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.76.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.77.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.77.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.77.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.78.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.78.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.78.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.79.gate_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.79.up_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.17.mlp.experts.79.down_proj.weight": "model-00018-of-000051.safetensors",
+ "model.layers.18.input_layernorm.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.pre_mlp_layernorm.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.post_attention_layernorm.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.post_mlp_layernorm.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.self_attn.qkv_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.self_attn.o_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.self_attn.k_layernorm.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.self_attn.param_sink_key": "model-00019-of-000051.safetensors",
+ "model.layers.18.self_attn.param_sink_value": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.gate.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.e_score_correction_bias": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.shared_experts.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.shared_experts.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.shared_experts.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.0.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.0.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.0.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.1.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.1.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.1.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.2.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.2.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.2.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.3.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.3.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.3.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.4.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.4.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.4.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.5.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.5.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.5.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.6.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.6.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.6.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.7.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.7.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.7.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.8.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.8.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.8.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.9.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.9.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.9.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.10.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.10.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.10.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.11.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.11.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.11.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.12.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.12.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.12.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.13.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.13.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.13.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.14.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.14.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.14.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.15.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.15.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.15.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.16.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.16.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.16.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.17.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.17.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.17.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.18.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.18.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.18.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.19.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.19.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.19.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.20.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.20.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.20.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.21.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.21.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.21.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.22.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.22.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.22.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.23.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.23.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.23.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.24.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.24.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.24.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.25.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.25.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.25.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.26.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.26.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.26.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.27.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.27.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.27.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.28.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.28.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.28.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.29.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.29.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.29.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.30.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.30.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.30.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.31.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.31.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.31.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.32.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.32.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.32.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.33.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.33.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.33.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.34.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.34.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.34.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.35.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.35.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.35.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.36.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.36.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.36.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.37.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.37.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.37.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.38.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.38.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.38.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.39.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.39.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.39.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.40.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.40.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.40.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.41.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.41.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.41.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.42.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.42.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.42.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.43.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.43.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.43.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.44.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.44.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.44.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.45.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.45.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.45.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.46.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.46.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.46.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.47.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.47.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.47.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.48.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.48.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.48.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.49.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.49.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.49.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.50.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.50.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.50.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.51.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.51.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.51.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.52.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.52.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.52.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.53.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.53.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.53.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.54.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.54.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.54.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.55.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.55.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.55.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.56.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.56.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.56.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.57.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.57.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.57.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.58.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.58.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.58.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.59.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.59.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.59.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.60.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.60.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.60.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.61.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.61.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.61.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.62.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.62.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.62.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.63.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.63.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.63.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.64.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.64.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.64.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.65.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.65.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.65.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.66.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.66.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.66.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.67.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.67.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.67.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.68.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.68.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.68.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.69.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.69.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.69.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.70.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.70.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.70.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.71.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.71.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.71.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.72.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.72.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.72.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.73.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.73.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.73.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.74.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.74.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.74.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.75.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.75.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.75.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.76.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.76.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.76.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.77.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.77.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.77.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.78.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.78.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.78.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.79.gate_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.79.up_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.18.mlp.experts.79.down_proj.weight": "model-00019-of-000051.safetensors",
+ "model.layers.19.input_layernorm.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.pre_mlp_layernorm.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.post_attention_layernorm.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.post_mlp_layernorm.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.self_attn.qkv_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.self_attn.o_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.self_attn.k_layernorm.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.self_attn.param_sink_key": "model-00020-of-000051.safetensors",
+ "model.layers.19.self_attn.param_sink_value": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.gate.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.e_score_correction_bias": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.shared_experts.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.shared_experts.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.shared_experts.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.0.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.0.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.0.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.1.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.1.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.1.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.2.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.2.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.2.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.3.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.3.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.3.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.4.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.4.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.4.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.5.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.5.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.5.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.6.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.6.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.6.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.7.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.7.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.7.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.8.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.8.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.8.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.9.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.9.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.9.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.10.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.10.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.10.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.11.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.11.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.11.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.12.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.12.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.12.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.13.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.13.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.13.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.14.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.14.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.14.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.15.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.15.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.15.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.16.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.16.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.16.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.17.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.17.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.17.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.18.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.18.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.18.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.19.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.19.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.19.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.20.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.20.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.20.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.21.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.21.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.21.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.22.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.22.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.22.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.23.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.23.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.23.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.24.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.24.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.24.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.25.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.25.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.25.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.26.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.26.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.26.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.27.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.27.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.27.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.28.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.28.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.28.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.29.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.29.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.29.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.30.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.30.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.30.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.31.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.31.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.31.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.32.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.32.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.32.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.33.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.33.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.33.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.34.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.34.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.34.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.35.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.35.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.35.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.36.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.36.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.36.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.37.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.37.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.37.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.38.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.38.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.38.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.39.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.39.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.39.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.40.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.40.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.40.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.41.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.41.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.41.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.42.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.42.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.42.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.43.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.43.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.43.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.44.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.44.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.44.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.45.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.45.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.45.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.46.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.46.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.46.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.47.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.47.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.47.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.48.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.48.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.48.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.49.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.49.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.49.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.50.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.50.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.50.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.51.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.51.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.51.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.52.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.52.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.52.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.53.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.53.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.53.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.54.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.54.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.54.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.55.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.55.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.55.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.56.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.56.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.56.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.57.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.57.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.57.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.58.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.58.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.58.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.59.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.59.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.59.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.60.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.60.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.60.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.61.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.61.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.61.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.62.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.62.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.62.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.63.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.63.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.63.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.64.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.64.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.64.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.65.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.65.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.65.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.66.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.66.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.66.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.67.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.67.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.67.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.68.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.68.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.68.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.69.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.69.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.69.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.70.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.70.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.70.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.71.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.71.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.71.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.72.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.72.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.72.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.73.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.73.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.73.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.74.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.74.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.74.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.75.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.75.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.75.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.76.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.76.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.76.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.77.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.77.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.77.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.78.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.78.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.78.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.79.gate_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.79.up_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.19.mlp.experts.79.down_proj.weight": "model-00020-of-000051.safetensors",
+ "model.layers.20.input_layernorm.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.pre_mlp_layernorm.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.post_attention_layernorm.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.post_mlp_layernorm.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.self_attn.qkv_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.self_attn.o_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.self_attn.k_layernorm.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.self_attn.param_sink_key": "model-00021-of-000051.safetensors",
+ "model.layers.20.self_attn.param_sink_value": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.gate.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.e_score_correction_bias": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.shared_experts.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.shared_experts.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.shared_experts.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.0.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.0.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.0.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.1.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.1.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.1.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.2.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.2.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.2.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.3.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.3.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.3.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.4.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.4.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.4.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.5.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.5.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.5.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.6.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.6.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.6.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.7.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.7.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.7.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.8.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.8.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.8.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.9.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.9.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.9.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.10.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.10.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.10.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.11.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.11.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.11.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.12.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.12.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.12.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.13.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.13.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.13.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.14.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.14.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.14.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.15.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.15.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.15.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.16.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.16.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.16.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.17.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.17.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.17.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.18.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.18.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.18.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.19.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.19.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.19.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.20.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.20.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.20.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.21.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.21.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.21.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.22.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.22.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.22.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.23.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.23.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.23.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.24.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.24.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.24.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.25.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.25.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.25.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.26.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.26.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.26.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.27.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.27.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.27.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.28.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.28.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.28.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.29.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.29.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.29.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.30.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.30.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.30.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.31.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.31.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.31.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.32.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.32.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.32.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.33.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.33.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.33.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.34.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.34.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.34.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.35.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.35.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.35.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.36.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.36.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.36.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.37.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.37.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.37.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.38.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.38.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.38.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.39.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.39.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.39.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.40.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.40.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.40.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.41.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.41.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.41.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.42.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.42.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.42.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.43.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.43.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.43.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.44.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.44.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.44.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.45.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.45.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.45.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.46.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.46.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.46.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.47.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.47.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.47.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.48.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.48.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.48.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.49.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.49.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.49.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.50.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.50.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.50.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.51.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.51.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.51.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.52.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.52.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.52.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.53.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.53.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.53.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.54.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.54.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.54.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.55.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.55.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.55.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.56.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.56.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.56.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.57.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.57.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.57.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.58.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.58.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.58.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.59.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.59.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.59.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.60.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.60.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.60.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.61.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.61.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.61.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.62.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.62.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.62.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.63.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.63.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.63.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.64.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.64.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.64.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.65.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.65.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.65.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.66.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.66.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.66.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.67.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.67.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.67.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.68.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.68.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.68.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.69.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.69.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.69.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.70.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.70.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.70.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.71.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.71.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.71.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.72.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.72.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.72.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.73.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.73.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.73.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.74.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.74.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.74.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.75.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.75.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.75.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.76.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.76.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.76.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.77.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.77.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.77.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.78.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.78.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.78.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.79.gate_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.79.up_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.20.mlp.experts.79.down_proj.weight": "model-00021-of-000051.safetensors",
+ "model.layers.21.input_layernorm.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.pre_mlp_layernorm.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.post_attention_layernorm.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.post_mlp_layernorm.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.self_attn.qkv_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.self_attn.o_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.self_attn.k_layernorm.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.self_attn.param_sink_key": "model-00022-of-000051.safetensors",
+ "model.layers.21.self_attn.param_sink_value": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.gate.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.e_score_correction_bias": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.shared_experts.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.shared_experts.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.shared_experts.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.0.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.0.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.0.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.1.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.1.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.1.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.2.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.2.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.2.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.3.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.3.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.3.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.4.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.4.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.4.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.5.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.5.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.5.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.6.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.6.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.6.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.7.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.7.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.7.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.8.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.8.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.8.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.9.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.9.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.9.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.10.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.10.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.10.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.11.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.11.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.11.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.12.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.12.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.12.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.13.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.13.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.13.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.14.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.14.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.14.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.15.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.15.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.15.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.16.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.16.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.16.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.17.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.17.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.17.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.18.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.18.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.18.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.19.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.19.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.19.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.20.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.20.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.20.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.21.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.21.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.21.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.22.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.22.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.22.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.23.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.23.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.23.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.24.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.24.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.24.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.25.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.25.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.25.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.26.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.26.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.26.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.27.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.27.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.27.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.28.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.28.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.28.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.29.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.29.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.29.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.30.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.30.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.30.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.31.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.31.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.31.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.32.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.32.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.32.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.33.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.33.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.33.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.34.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.34.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.34.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.35.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.35.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.35.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.36.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.36.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.36.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.37.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.37.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.37.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.38.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.38.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.38.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.39.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.39.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.39.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.40.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.40.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.40.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.41.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.41.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.41.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.42.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.42.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.42.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.43.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.43.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.43.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.44.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.44.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.44.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.45.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.45.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.45.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.46.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.46.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.46.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.47.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.47.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.47.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.48.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.48.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.48.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.49.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.49.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.49.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.50.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.50.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.50.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.51.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.51.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.51.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.52.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.52.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.52.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.53.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.53.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.53.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.54.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.54.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.54.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.55.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.55.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.55.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.56.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.56.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.56.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.57.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.57.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.57.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.58.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.58.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.58.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.59.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.59.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.59.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.60.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.60.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.60.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.61.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.61.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.61.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.62.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.62.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.62.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.63.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.63.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.63.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.64.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.64.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.64.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.65.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.65.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.65.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.66.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.66.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.66.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.67.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.67.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.67.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.68.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.68.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.68.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.69.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.69.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.69.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.70.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.70.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.70.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.71.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.71.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.71.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.72.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.72.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.72.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.73.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.73.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.73.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.74.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.74.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.74.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.75.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.75.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.75.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.76.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.76.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.76.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.77.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.77.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.77.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.78.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.78.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.78.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.79.gate_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.79.up_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.21.mlp.experts.79.down_proj.weight": "model-00022-of-000051.safetensors",
+ "model.layers.22.input_layernorm.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.pre_mlp_layernorm.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.post_attention_layernorm.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.post_mlp_layernorm.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.self_attn.qkv_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.self_attn.o_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.self_attn.k_layernorm.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.self_attn.param_sink_key": "model-00023-of-000051.safetensors",
+ "model.layers.22.self_attn.param_sink_value": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.gate.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.e_score_correction_bias": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.shared_experts.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.shared_experts.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.shared_experts.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.0.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.0.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.0.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.1.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.1.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.1.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.2.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.2.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.2.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.3.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.3.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.3.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.4.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.4.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.4.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.5.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.5.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.5.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.6.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.6.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.6.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.7.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.7.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.7.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.8.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.8.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.8.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.9.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.9.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.9.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.10.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.10.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.10.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.11.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.11.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.11.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.12.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.12.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.12.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.13.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.13.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.13.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.14.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.14.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.14.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.15.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.15.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.15.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.16.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.16.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.16.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.17.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.17.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.17.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.18.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.18.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.18.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.19.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.19.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.19.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.20.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.20.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.20.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.21.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.21.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.21.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.22.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.22.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.22.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.23.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.23.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.23.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.24.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.24.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.24.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.25.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.25.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.25.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.26.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.26.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.26.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.27.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.27.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.27.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.28.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.28.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.28.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.29.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.29.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.29.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.30.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.30.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.30.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.31.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.31.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.31.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.32.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.32.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.32.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.33.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.33.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.33.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.34.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.34.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.34.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.35.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.35.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.35.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.36.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.36.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.36.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.37.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.37.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.37.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.38.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.38.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.38.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.39.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.39.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.39.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.40.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.40.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.40.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.41.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.41.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.41.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.42.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.42.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.42.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.43.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.43.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.43.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.44.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.44.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.44.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.45.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.45.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.45.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.46.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.46.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.46.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.47.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.47.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.47.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.48.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.48.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.48.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.49.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.49.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.49.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.50.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.50.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.50.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.51.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.51.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.51.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.52.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.52.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.52.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.53.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.53.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.53.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.54.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.54.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.54.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.55.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.55.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.55.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.56.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.56.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.56.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.57.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.57.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.57.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.58.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.58.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.58.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.59.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.59.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.59.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.60.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.60.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.60.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.61.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.61.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.61.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.62.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.62.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.62.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.63.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.63.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.63.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.64.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.64.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.64.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.65.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.65.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.65.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.66.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.66.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.66.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.67.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.67.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.67.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.68.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.68.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.68.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.69.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.69.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.69.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.70.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.70.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.70.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.71.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.71.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.71.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.72.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.72.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.72.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.73.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.73.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.73.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.74.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.74.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.74.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.75.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.75.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.75.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.76.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.76.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.76.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.77.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.77.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.77.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.78.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.78.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.78.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.79.gate_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.79.up_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.22.mlp.experts.79.down_proj.weight": "model-00023-of-000051.safetensors",
+ "model.layers.23.input_layernorm.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.pre_mlp_layernorm.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.post_attention_layernorm.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.post_mlp_layernorm.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.self_attn.qkv_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.self_attn.o_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.self_attn.k_layernorm.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.self_attn.param_sink_key": "model-00024-of-000051.safetensors",
+ "model.layers.23.self_attn.param_sink_value": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.gate.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.e_score_correction_bias": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.shared_experts.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.shared_experts.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.shared_experts.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.0.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.0.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.0.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.1.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.1.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.1.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.2.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.2.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.2.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.3.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.3.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.3.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.4.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.4.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.4.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.5.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.5.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.5.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.6.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.6.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.6.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.7.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.7.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.7.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.8.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.8.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.8.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.9.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.9.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.9.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.10.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.10.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.10.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.11.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.11.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.11.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.12.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.12.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.12.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.13.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.13.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.13.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.14.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.14.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.14.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.15.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.15.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.15.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.16.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.16.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.16.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.17.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.17.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.17.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.18.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.18.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.18.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.19.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.19.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.19.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.20.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.20.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.20.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.21.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.21.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.21.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.22.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.22.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.22.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.23.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.23.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.23.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.24.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.24.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.24.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.25.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.25.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.25.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.26.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.26.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.26.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.27.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.27.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.27.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.28.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.28.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.28.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.29.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.29.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.29.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.30.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.30.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.30.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.31.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.31.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.31.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.32.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.32.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.32.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.33.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.33.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.33.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.34.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.34.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.34.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.35.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.35.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.35.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.36.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.36.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.36.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.37.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.37.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.37.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.38.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.38.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.38.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.39.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.39.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.39.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.40.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.40.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.40.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.41.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.41.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.41.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.42.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.42.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.42.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.43.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.43.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.43.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.44.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.44.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.44.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.45.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.45.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.45.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.46.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.46.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.46.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.47.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.47.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.47.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.48.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.48.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.48.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.49.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.49.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.49.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.50.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.50.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.50.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.51.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.51.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.51.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.52.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.52.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.52.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.53.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.53.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.53.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.54.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.54.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.54.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.55.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.55.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.55.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.56.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.56.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.56.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.57.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.57.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.57.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.58.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.58.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.58.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.59.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.59.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.59.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.60.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.60.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.60.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.61.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.61.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.61.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.62.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.62.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.62.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.63.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.63.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.63.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.64.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.64.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.64.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.65.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.65.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.65.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.66.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.66.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.66.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.67.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.67.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.67.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.68.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.68.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.68.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.69.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.69.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.69.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.70.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.70.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.70.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.71.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.71.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.71.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.72.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.72.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.72.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.73.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.73.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.73.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.74.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.74.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.74.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.75.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.75.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.75.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.76.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.76.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.76.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.77.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.77.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.77.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.78.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.78.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.78.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.79.gate_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.79.up_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.23.mlp.experts.79.down_proj.weight": "model-00024-of-000051.safetensors",
+ "model.layers.24.input_layernorm.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.pre_mlp_layernorm.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.post_attention_layernorm.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.post_mlp_layernorm.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.self_attn.qkv_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.self_attn.o_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.self_attn.k_layernorm.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.self_attn.param_sink_key": "model-00025-of-000051.safetensors",
+ "model.layers.24.self_attn.param_sink_value": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.gate.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.e_score_correction_bias": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.shared_experts.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.shared_experts.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.shared_experts.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.0.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.0.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.0.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.1.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.1.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.1.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.2.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.2.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.2.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.3.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.3.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.3.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.4.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.4.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.4.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.5.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.5.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.5.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.6.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.6.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.6.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.7.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.7.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.7.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.8.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.8.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.8.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.9.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.9.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.9.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.10.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.10.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.10.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.11.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.11.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.11.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.12.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.12.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.12.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.13.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.13.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.13.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.14.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.14.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.14.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.15.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.15.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.15.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.16.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.16.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.16.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.17.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.17.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.17.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.18.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.18.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.18.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.19.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.19.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.19.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.20.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.20.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.20.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.21.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.21.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.21.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.22.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.22.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.22.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.23.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.23.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.23.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.24.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.24.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.24.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.25.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.25.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.25.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.26.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.26.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.26.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.27.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.27.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.27.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.28.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.28.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.28.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.29.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.29.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.29.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.30.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.30.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.30.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.31.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.31.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.31.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.32.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.32.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.32.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.33.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.33.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.33.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.34.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.34.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.34.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.35.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.35.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.35.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.36.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.36.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.36.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.37.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.37.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.37.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.38.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.38.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.38.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.39.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.39.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.39.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.40.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.40.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.40.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.41.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.41.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.41.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.42.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.42.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.42.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.43.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.43.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.43.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.44.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.44.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.44.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.45.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.45.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.45.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.46.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.46.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.46.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.47.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.47.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.47.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.48.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.48.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.48.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.49.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.49.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.49.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.50.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.50.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.50.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.51.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.51.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.51.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.52.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.52.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.52.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.53.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.53.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.53.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.54.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.54.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.54.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.55.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.55.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.55.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.56.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.56.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.56.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.57.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.57.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.57.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.58.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.58.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.58.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.59.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.59.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.59.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.60.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.60.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.60.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.61.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.61.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.61.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.62.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.62.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.62.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.63.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.63.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.63.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.64.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.64.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.64.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.65.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.65.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.65.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.66.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.66.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.66.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.67.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.67.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.67.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.68.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.68.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.68.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.69.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.69.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.69.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.70.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.70.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.70.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.71.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.71.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.71.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.72.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.72.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.72.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.73.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.73.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.73.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.74.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.74.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.74.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.75.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.75.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.75.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.76.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.76.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.76.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.77.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.77.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.77.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.78.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.78.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.78.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.79.gate_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.79.up_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.24.mlp.experts.79.down_proj.weight": "model-00025-of-000051.safetensors",
+ "model.layers.25.input_layernorm.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.pre_mlp_layernorm.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.post_attention_layernorm.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.post_mlp_layernorm.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.self_attn.qkv_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.self_attn.o_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.self_attn.k_layernorm.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.self_attn.param_sink_key": "model-00026-of-000051.safetensors",
+ "model.layers.25.self_attn.param_sink_value": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.gate.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.e_score_correction_bias": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.shared_experts.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.shared_experts.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.shared_experts.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.0.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.0.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.0.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.1.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.1.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.1.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.2.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.2.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.2.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.3.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.3.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.3.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.4.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.4.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.4.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.5.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.5.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.5.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.6.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.6.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.6.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.7.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.7.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.7.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.8.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.8.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.8.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.9.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.9.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.9.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.10.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.10.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.10.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.11.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.11.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.11.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.12.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.12.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.12.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.13.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.13.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.13.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.14.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.14.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.14.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.15.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.15.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.15.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.16.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.16.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.16.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.17.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.17.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.17.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.18.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.18.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.18.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.19.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.19.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.19.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.20.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.20.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.20.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.21.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.21.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.21.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.22.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.22.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.22.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.23.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.23.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.23.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.24.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.24.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.24.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.25.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.25.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.25.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.26.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.26.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.26.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.27.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.27.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.27.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.28.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.28.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.28.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.29.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.29.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.29.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.30.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.30.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.30.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.31.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.31.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.31.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.32.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.32.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.32.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.33.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.33.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.33.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.34.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.34.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.34.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.35.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.35.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.35.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.36.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.36.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.36.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.37.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.37.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.37.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.38.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.38.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.38.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.39.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.39.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.39.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.40.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.40.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.40.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.41.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.41.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.41.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.42.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.42.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.42.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.43.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.43.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.43.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.44.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.44.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.44.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.45.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.45.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.45.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.46.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.46.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.46.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.47.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.47.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.47.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.48.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.48.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.48.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.49.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.49.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.49.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.50.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.50.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.50.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.51.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.51.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.51.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.52.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.52.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.52.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.53.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.53.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.53.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.54.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.54.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.54.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.55.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.55.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.55.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.56.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.56.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.56.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.57.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.57.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.57.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.58.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.58.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.58.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.59.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.59.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.59.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.60.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.60.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.60.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.61.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.61.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.61.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.62.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.62.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.62.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.63.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.63.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.63.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.64.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.64.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.64.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.65.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.65.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.65.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.66.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.66.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.66.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.67.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.67.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.67.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.68.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.68.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.68.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.69.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.69.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.69.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.70.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.70.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.70.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.71.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.71.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.71.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.72.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.72.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.72.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.73.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.73.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.73.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.74.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.74.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.74.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.75.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.75.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.75.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.76.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.76.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.76.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.77.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.77.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.77.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.78.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.78.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.78.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.79.gate_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.79.up_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.25.mlp.experts.79.down_proj.weight": "model-00026-of-000051.safetensors",
+ "model.layers.26.input_layernorm.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.pre_mlp_layernorm.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.post_attention_layernorm.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.post_mlp_layernorm.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.self_attn.qkv_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.self_attn.o_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.self_attn.k_layernorm.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.self_attn.param_sink_key": "model-00027-of-000051.safetensors",
+ "model.layers.26.self_attn.param_sink_value": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.gate.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.e_score_correction_bias": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.shared_experts.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.shared_experts.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.shared_experts.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.0.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.0.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.0.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.1.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.1.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.1.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.2.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.2.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.2.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.3.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.3.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.3.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.4.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.4.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.4.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.5.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.5.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.5.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.6.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.6.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.6.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.7.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.7.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.7.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.8.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.8.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.8.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.9.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.9.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.9.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.10.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.10.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.10.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.11.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.11.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.11.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.12.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.12.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.12.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.13.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.13.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.13.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.14.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.14.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.14.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.15.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.15.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.15.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.16.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.16.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.16.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.17.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.17.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.17.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.18.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.18.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.18.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.19.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.19.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.19.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.20.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.20.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.20.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.21.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.21.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.21.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.22.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.22.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.22.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.23.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.23.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.23.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.24.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.24.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.24.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.25.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.25.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.25.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.26.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.26.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.26.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.27.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.27.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.27.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.28.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.28.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.28.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.29.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.29.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.29.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.30.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.30.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.30.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.31.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.31.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.31.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.32.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.32.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.32.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.33.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.33.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.33.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.34.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.34.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.34.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.35.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.35.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.35.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.36.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.36.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.36.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.37.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.37.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.37.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.38.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.38.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.38.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.39.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.39.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.39.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.40.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.40.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.40.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.41.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.41.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.41.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.42.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.42.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.42.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.43.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.43.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.43.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.44.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.44.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.44.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.45.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.45.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.45.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.46.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.46.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.46.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.47.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.47.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.47.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.48.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.48.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.48.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.49.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.49.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.49.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.50.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.50.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.50.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.51.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.51.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.51.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.52.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.52.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.52.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.53.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.53.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.53.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.54.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.54.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.54.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.55.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.55.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.55.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.56.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.56.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.56.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.57.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.57.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.57.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.58.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.58.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.58.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.59.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.59.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.59.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.60.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.60.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.60.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.61.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.61.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.61.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.62.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.62.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.62.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.63.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.63.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.63.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.64.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.64.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.64.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.65.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.65.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.65.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.66.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.66.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.66.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.67.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.67.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.67.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.68.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.68.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.68.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.69.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.69.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.69.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.70.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.70.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.70.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.71.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.71.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.71.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.72.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.72.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.72.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.73.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.73.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.73.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.74.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.74.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.74.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.75.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.75.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.75.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.76.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.76.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.76.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.77.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.77.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.77.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.78.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.78.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.78.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.79.gate_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.79.up_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.26.mlp.experts.79.down_proj.weight": "model-00027-of-000051.safetensors",
+ "model.layers.27.input_layernorm.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.pre_mlp_layernorm.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.post_attention_layernorm.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.post_mlp_layernorm.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.self_attn.qkv_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.self_attn.o_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.self_attn.k_layernorm.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.self_attn.param_sink_key": "model-00028-of-000051.safetensors",
+ "model.layers.27.self_attn.param_sink_value": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.gate.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.e_score_correction_bias": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.shared_experts.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.shared_experts.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.shared_experts.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.0.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.0.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.0.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.1.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.1.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.1.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.2.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.2.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.2.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.3.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.3.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.3.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.4.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.4.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.4.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.5.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.5.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.5.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.6.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.6.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.6.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.7.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.7.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.7.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.8.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.8.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.8.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.9.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.9.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.9.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.10.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.10.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.10.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.11.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.11.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.11.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.12.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.12.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.12.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.13.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.13.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.13.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.14.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.14.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.14.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.15.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.15.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.15.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.16.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.16.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.16.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.17.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.17.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.17.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.18.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.18.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.18.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.19.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.19.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.19.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.20.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.20.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.20.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.21.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.21.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.21.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.22.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.22.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.22.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.23.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.23.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.23.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.24.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.24.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.24.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.25.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.25.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.25.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.26.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.26.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.26.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.27.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.27.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.27.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.28.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.28.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.28.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.29.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.29.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.29.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.30.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.30.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.30.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.31.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.31.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.31.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.32.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.32.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.32.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.33.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.33.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.33.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.34.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.34.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.34.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.35.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.35.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.35.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.36.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.36.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.36.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.37.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.37.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.37.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.38.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.38.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.38.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.39.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.39.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.39.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.40.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.40.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.40.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.41.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.41.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.41.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.42.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.42.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.42.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.43.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.43.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.43.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.44.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.44.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.44.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.45.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.45.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.45.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.46.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.46.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.46.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.47.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.47.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.47.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.48.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.48.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.48.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.49.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.49.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.49.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.50.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.50.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.50.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.51.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.51.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.51.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.52.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.52.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.52.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.53.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.53.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.53.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.54.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.54.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.54.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.55.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.55.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.55.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.56.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.56.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.56.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.57.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.57.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.57.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.58.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.58.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.58.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.59.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.59.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.59.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.60.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.60.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.60.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.61.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.61.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.61.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.62.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.62.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.62.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.63.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.63.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.63.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.64.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.64.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.64.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.65.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.65.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.65.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.66.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.66.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.66.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.67.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.67.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.67.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.68.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.68.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.68.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.69.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.69.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.69.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.70.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.70.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.70.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.71.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.71.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.71.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.72.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.72.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.72.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.73.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.73.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.73.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.74.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.74.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.74.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.75.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.75.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.75.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.76.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.76.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.76.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.77.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.77.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.77.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.78.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.78.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.78.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.79.gate_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.79.up_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.27.mlp.experts.79.down_proj.weight": "model-00028-of-000051.safetensors",
+ "model.layers.28.input_layernorm.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.pre_mlp_layernorm.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.post_attention_layernorm.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.post_mlp_layernorm.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.self_attn.qkv_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.self_attn.o_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.self_attn.k_layernorm.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.self_attn.param_sink_key": "model-00029-of-000051.safetensors",
+ "model.layers.28.self_attn.param_sink_value": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.gate.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.e_score_correction_bias": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.shared_experts.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.shared_experts.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.shared_experts.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.0.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.0.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.0.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.1.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.1.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.1.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.2.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.2.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.2.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.3.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.3.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.3.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.4.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.4.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.4.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.5.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.5.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.5.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.6.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.6.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.6.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.7.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.7.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.7.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.8.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.8.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.8.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.9.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.9.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.9.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.10.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.10.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.10.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.11.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.11.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.11.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.12.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.12.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.12.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.13.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.13.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.13.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.14.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.14.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.14.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.15.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.15.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.15.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.16.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.16.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.16.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.17.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.17.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.17.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.18.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.18.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.18.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.19.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.19.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.19.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.20.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.20.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.20.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.21.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.21.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.21.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.22.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.22.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.22.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.23.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.23.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.23.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.24.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.24.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.24.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.25.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.25.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.25.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.26.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.26.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.26.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.27.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.27.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.27.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.28.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.28.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.28.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.29.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.29.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.29.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.30.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.30.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.30.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.31.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.31.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.31.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.32.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.32.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.32.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.33.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.33.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.33.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.34.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.34.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.34.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.35.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.35.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.35.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.36.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.36.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.36.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.37.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.37.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.37.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.38.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.38.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.38.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.39.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.39.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.39.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.40.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.40.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.40.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.41.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.41.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.41.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.42.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.42.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.42.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.43.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.43.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.43.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.44.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.44.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.44.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.45.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.45.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.45.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.46.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.46.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.46.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.47.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.47.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.47.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.48.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.48.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.48.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.49.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.49.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.49.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.50.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.50.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.50.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.51.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.51.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.51.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.52.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.52.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.52.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.53.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.53.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.53.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.54.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.54.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.54.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.55.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.55.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.55.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.56.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.56.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.56.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.57.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.57.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.57.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.58.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.58.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.58.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.59.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.59.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.59.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.60.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.60.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.60.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.61.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.61.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.61.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.62.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.62.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.62.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.63.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.63.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.63.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.64.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.64.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.64.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.65.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.65.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.65.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.66.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.66.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.66.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.67.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.67.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.67.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.68.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.68.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.68.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.69.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.69.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.69.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.70.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.70.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.70.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.71.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.71.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.71.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.72.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.72.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.72.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.73.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.73.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.73.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.74.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.74.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.74.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.75.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.75.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.75.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.76.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.76.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.76.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.77.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.77.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.77.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.78.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.78.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.78.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.79.gate_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.79.up_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.28.mlp.experts.79.down_proj.weight": "model-00029-of-000051.safetensors",
+ "model.layers.29.input_layernorm.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.pre_mlp_layernorm.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.post_attention_layernorm.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.post_mlp_layernorm.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.self_attn.qkv_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.self_attn.o_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.self_attn.k_layernorm.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.self_attn.param_sink_key": "model-00030-of-000051.safetensors",
+ "model.layers.29.self_attn.param_sink_value": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.gate.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.e_score_correction_bias": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.shared_experts.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.shared_experts.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.shared_experts.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.0.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.0.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.0.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.1.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.1.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.1.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.2.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.2.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.2.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.3.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.3.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.3.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.4.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.4.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.4.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.5.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.5.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.5.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.6.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.6.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.6.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.7.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.7.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.7.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.8.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.8.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.8.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.9.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.9.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.9.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.10.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.10.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.10.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.11.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.11.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.11.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.12.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.12.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.12.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.13.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.13.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.13.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.14.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.14.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.14.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.15.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.15.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.15.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.16.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.16.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.16.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.17.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.17.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.17.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.18.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.18.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.18.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.19.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.19.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.19.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.20.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.20.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.20.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.21.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.21.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.21.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.22.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.22.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.22.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.23.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.23.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.23.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.24.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.24.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.24.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.25.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.25.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.25.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.26.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.26.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.26.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.27.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.27.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.27.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.28.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.28.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.28.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.29.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.29.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.29.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.30.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.30.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.30.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.31.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.31.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.31.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.32.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.32.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.32.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.33.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.33.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.33.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.34.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.34.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.34.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.35.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.35.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.35.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.36.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.36.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.36.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.37.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.37.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.37.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.38.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.38.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.38.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.39.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.39.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.39.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.40.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.40.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.40.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.41.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.41.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.41.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.42.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.42.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.42.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.43.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.43.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.43.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.44.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.44.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.44.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.45.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.45.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.45.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.46.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.46.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.46.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.47.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.47.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.47.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.48.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.48.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.48.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.49.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.49.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.49.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.50.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.50.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.50.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.51.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.51.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.51.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.52.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.52.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.52.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.53.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.53.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.53.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.54.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.54.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.54.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.55.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.55.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.55.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.56.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.56.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.56.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.57.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.57.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.57.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.58.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.58.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.58.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.59.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.59.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.59.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.60.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.60.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.60.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.61.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.61.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.61.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.62.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.62.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.62.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.63.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.63.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.63.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.64.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.64.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.64.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.65.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.65.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.65.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.66.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.66.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.66.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.67.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.67.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.67.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.68.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.68.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.68.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.69.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.69.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.69.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.70.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.70.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.70.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.71.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.71.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.71.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.72.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.72.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.72.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.73.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.73.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.73.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.74.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.74.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.74.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.75.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.75.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.75.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.76.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.76.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.76.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.77.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.77.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.77.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.78.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.78.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.78.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.79.gate_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.79.up_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.29.mlp.experts.79.down_proj.weight": "model-00030-of-000051.safetensors",
+ "model.layers.30.input_layernorm.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.pre_mlp_layernorm.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.post_attention_layernorm.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.post_mlp_layernorm.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.self_attn.qkv_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.self_attn.o_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.self_attn.k_layernorm.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.self_attn.param_sink_key": "model-00031-of-000051.safetensors",
+ "model.layers.30.self_attn.param_sink_value": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.gate.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.e_score_correction_bias": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.shared_experts.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.shared_experts.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.shared_experts.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.0.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.0.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.0.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.1.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.1.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.1.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.2.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.2.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.2.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.3.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.3.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.3.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.4.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.4.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.4.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.5.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.5.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.5.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.6.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.6.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.6.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.7.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.7.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.7.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.8.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.8.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.8.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.9.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.9.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.9.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.10.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.10.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.10.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.11.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.11.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.11.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.12.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.12.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.12.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.13.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.13.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.13.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.14.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.14.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.14.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.15.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.15.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.15.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.16.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.16.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.16.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.17.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.17.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.17.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.18.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.18.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.18.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.19.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.19.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.19.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.20.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.20.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.20.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.21.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.21.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.21.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.22.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.22.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.22.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.23.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.23.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.23.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.24.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.24.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.24.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.25.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.25.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.25.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.26.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.26.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.26.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.27.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.27.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.27.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.28.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.28.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.28.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.29.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.29.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.29.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.30.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.30.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.30.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.31.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.31.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.31.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.32.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.32.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.32.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.33.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.33.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.33.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.34.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.34.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.34.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.35.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.35.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.35.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.36.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.36.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.36.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.37.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.37.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.37.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.38.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.38.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.38.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.39.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.39.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.39.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.40.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.40.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.40.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.41.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.41.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.41.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.42.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.42.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.42.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.43.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.43.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.43.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.44.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.44.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.44.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.45.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.45.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.45.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.46.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.46.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.46.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.47.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.47.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.47.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.48.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.48.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.48.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.49.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.49.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.49.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.50.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.50.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.50.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.51.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.51.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.51.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.52.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.52.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.52.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.53.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.53.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.53.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.54.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.54.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.54.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.55.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.55.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.55.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.56.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.56.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.56.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.57.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.57.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.57.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.58.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.58.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.58.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.59.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.59.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.59.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.60.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.60.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.60.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.61.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.61.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.61.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.62.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.62.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.62.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.63.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.63.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.63.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.64.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.64.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.64.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.65.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.65.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.65.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.66.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.66.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.66.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.67.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.67.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.67.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.68.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.68.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.68.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.69.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.69.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.69.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.70.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.70.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.70.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.71.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.71.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.71.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.72.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.72.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.72.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.73.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.73.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.73.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.74.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.74.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.74.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.75.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.75.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.75.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.76.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.76.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.76.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.77.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.77.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.77.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.78.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.78.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.78.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.79.gate_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.79.up_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.30.mlp.experts.79.down_proj.weight": "model-00031-of-000051.safetensors",
+ "model.layers.31.input_layernorm.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.pre_mlp_layernorm.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.post_attention_layernorm.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.post_mlp_layernorm.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.self_attn.qkv_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.self_attn.o_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.self_attn.k_layernorm.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.self_attn.param_sink_key": "model-00032-of-000051.safetensors",
+ "model.layers.31.self_attn.param_sink_value": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.gate.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.e_score_correction_bias": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.shared_experts.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.shared_experts.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.shared_experts.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.0.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.0.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.0.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.1.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.1.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.1.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.2.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.2.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.2.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.3.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.3.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.3.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.4.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.4.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.4.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.5.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.5.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.5.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.6.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.6.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.6.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.7.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.7.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.7.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.8.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.8.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.8.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.9.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.9.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.9.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.10.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.10.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.10.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.11.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.11.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.11.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.12.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.12.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.12.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.13.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.13.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.13.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.14.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.14.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.14.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.15.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.15.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.15.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.16.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.16.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.16.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.17.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.17.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.17.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.18.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.18.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.18.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.19.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.19.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.19.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.20.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.20.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.20.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.21.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.21.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.21.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.22.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.22.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.22.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.23.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.23.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.23.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.24.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.24.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.24.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.25.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.25.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.25.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.26.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.26.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.26.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.27.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.27.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.27.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.28.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.28.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.28.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.29.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.29.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.29.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.30.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.30.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.30.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.31.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.31.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.31.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.32.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.32.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.32.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.33.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.33.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.33.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.34.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.34.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.34.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.35.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.35.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.35.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.36.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.36.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.36.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.37.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.37.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.37.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.38.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.38.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.38.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.39.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.39.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.39.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.40.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.40.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.40.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.41.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.41.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.41.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.42.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.42.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.42.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.43.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.43.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.43.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.44.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.44.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.44.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.45.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.45.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.45.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.46.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.46.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.46.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.47.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.47.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.47.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.48.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.48.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.48.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.49.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.49.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.49.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.50.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.50.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.50.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.51.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.51.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.51.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.52.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.52.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.52.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.53.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.53.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.53.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.54.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.54.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.54.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.55.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.55.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.55.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.56.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.56.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.56.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.57.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.57.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.57.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.58.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.58.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.58.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.59.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.59.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.59.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.60.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.60.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.60.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.61.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.61.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.61.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.62.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.62.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.62.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.63.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.63.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.63.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.64.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.64.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.64.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.65.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.65.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.65.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.66.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.66.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.66.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.67.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.67.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.67.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.68.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.68.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.68.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.69.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.69.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.69.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.70.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.70.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.70.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.71.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.71.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.71.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.72.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.72.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.72.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.73.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.73.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.73.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.74.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.74.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.74.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.75.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.75.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.75.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.76.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.76.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.76.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.77.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.77.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.77.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.78.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.78.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.78.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.79.gate_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.79.up_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.31.mlp.experts.79.down_proj.weight": "model-00032-of-000051.safetensors",
+ "model.layers.32.input_layernorm.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.pre_mlp_layernorm.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.post_attention_layernorm.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.post_mlp_layernorm.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.self_attn.qkv_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.self_attn.o_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.self_attn.k_layernorm.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.self_attn.param_sink_key": "model-00033-of-000051.safetensors",
+ "model.layers.32.self_attn.param_sink_value": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.gate.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.e_score_correction_bias": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.shared_experts.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.shared_experts.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.shared_experts.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.0.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.0.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.0.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.1.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.1.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.1.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.2.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.2.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.2.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.3.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.3.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.3.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.4.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.4.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.4.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.5.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.5.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.5.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.6.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.6.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.6.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.7.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.7.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.7.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.8.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.8.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.8.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.9.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.9.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.9.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.10.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.10.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.10.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.11.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.11.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.11.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.12.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.12.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.12.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.13.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.13.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.13.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.14.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.14.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.14.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.15.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.15.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.15.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.16.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.16.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.16.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.17.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.17.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.17.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.18.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.18.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.18.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.19.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.19.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.19.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.20.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.20.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.20.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.21.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.21.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.21.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.22.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.22.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.22.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.23.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.23.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.23.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.24.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.24.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.24.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.25.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.25.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.25.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.26.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.26.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.26.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.27.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.27.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.27.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.28.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.28.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.28.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.29.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.29.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.29.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.30.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.30.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.30.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.31.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.31.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.31.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.32.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.32.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.32.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.33.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.33.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.33.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.34.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.34.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.34.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.35.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.35.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.35.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.36.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.36.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.36.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.37.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.37.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.37.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.38.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.38.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.38.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.39.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.39.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.39.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.40.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.40.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.40.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.41.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.41.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.41.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.42.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.42.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.42.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.43.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.43.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.43.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.44.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.44.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.44.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.45.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.45.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.45.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.46.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.46.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.46.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.47.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.47.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.47.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.48.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.48.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.48.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.49.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.49.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.49.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.50.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.50.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.50.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.51.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.51.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.51.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.52.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.52.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.52.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.53.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.53.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.53.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.54.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.54.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.54.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.55.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.55.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.55.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.56.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.56.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.56.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.57.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.57.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.57.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.58.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.58.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.58.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.59.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.59.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.59.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.60.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.60.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.60.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.61.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.61.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.61.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.62.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.62.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.62.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.63.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.63.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.63.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.64.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.64.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.64.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.65.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.65.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.65.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.66.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.66.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.66.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.67.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.67.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.67.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.68.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.68.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.68.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.69.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.69.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.69.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.70.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.70.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.70.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.71.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.71.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.71.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.72.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.72.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.72.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.73.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.73.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.73.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.74.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.74.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.74.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.75.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.75.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.75.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.76.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.76.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.76.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.77.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.77.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.77.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.78.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.78.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.78.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.79.gate_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.79.up_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.32.mlp.experts.79.down_proj.weight": "model-00033-of-000051.safetensors",
+ "model.layers.33.input_layernorm.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.pre_mlp_layernorm.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.post_attention_layernorm.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.post_mlp_layernorm.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.self_attn.qkv_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.self_attn.o_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.self_attn.k_layernorm.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.self_attn.param_sink_key": "model-00034-of-000051.safetensors",
+ "model.layers.33.self_attn.param_sink_value": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.gate.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.e_score_correction_bias": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.shared_experts.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.shared_experts.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.shared_experts.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.0.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.0.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.0.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.1.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.1.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.1.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.2.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.2.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.2.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.3.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.3.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.3.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.4.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.4.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.4.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.5.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.5.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.5.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.6.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.6.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.6.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.7.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.7.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.7.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.8.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.8.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.8.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.9.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.9.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.9.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.10.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.10.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.10.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.11.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.11.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.11.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.12.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.12.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.12.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.13.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.13.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.13.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.14.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.14.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.14.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.15.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.15.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.15.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.16.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.16.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.16.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.17.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.17.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.17.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.18.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.18.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.18.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.19.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.19.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.19.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.20.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.20.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.20.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.21.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.21.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.21.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.22.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.22.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.22.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.23.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.23.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.23.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.24.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.24.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.24.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.25.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.25.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.25.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.26.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.26.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.26.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.27.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.27.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.27.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.28.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.28.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.28.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.29.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.29.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.29.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.30.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.30.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.30.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.31.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.31.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.31.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.32.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.32.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.32.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.33.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.33.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.33.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.34.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.34.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.34.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.35.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.35.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.35.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.36.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.36.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.36.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.37.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.37.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.37.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.38.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.38.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.38.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.39.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.39.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.39.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.40.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.40.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.40.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.41.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.41.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.41.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.42.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.42.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.42.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.43.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.43.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.43.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.44.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.44.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.44.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.45.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.45.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.45.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.46.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.46.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.46.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.47.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.47.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.47.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.48.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.48.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.48.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.49.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.49.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.49.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.50.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.50.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.50.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.51.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.51.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.51.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.52.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.52.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.52.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.53.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.53.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.53.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.54.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.54.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.54.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.55.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.55.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.55.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.56.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.56.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.56.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.57.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.57.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.57.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.58.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.58.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.58.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.59.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.59.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.59.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.60.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.60.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.60.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.61.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.61.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.61.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.62.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.62.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.62.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.63.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.63.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.63.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.64.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.64.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.64.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.65.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.65.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.65.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.66.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.66.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.66.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.67.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.67.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.67.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.68.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.68.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.68.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.69.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.69.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.69.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.70.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.70.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.70.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.71.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.71.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.71.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.72.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.72.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.72.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.73.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.73.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.73.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.74.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.74.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.74.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.75.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.75.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.75.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.76.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.76.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.76.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.77.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.77.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.77.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.78.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.78.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.78.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.79.gate_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.79.up_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.33.mlp.experts.79.down_proj.weight": "model-00034-of-000051.safetensors",
+ "model.layers.34.input_layernorm.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.pre_mlp_layernorm.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.post_attention_layernorm.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.post_mlp_layernorm.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.self_attn.qkv_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.self_attn.o_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.self_attn.k_layernorm.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.self_attn.param_sink_key": "model-00035-of-000051.safetensors",
+ "model.layers.34.self_attn.param_sink_value": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.gate.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.e_score_correction_bias": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.shared_experts.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.shared_experts.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.shared_experts.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.0.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.0.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.0.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.1.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.1.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.1.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.2.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.2.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.2.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.3.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.3.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.3.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.4.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.4.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.4.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.5.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.5.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.5.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.6.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.6.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.6.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.7.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.7.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.7.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.8.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.8.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.8.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.9.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.9.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.9.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.10.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.10.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.10.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.11.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.11.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.11.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.12.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.12.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.12.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.13.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.13.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.13.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.14.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.14.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.14.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.15.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.15.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.15.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.16.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.16.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.16.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.17.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.17.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.17.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.18.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.18.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.18.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.19.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.19.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.19.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.20.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.20.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.20.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.21.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.21.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.21.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.22.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.22.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.22.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.23.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.23.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.23.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.24.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.24.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.24.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.25.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.25.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.25.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.26.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.26.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.26.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.27.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.27.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.27.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.28.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.28.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.28.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.29.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.29.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.29.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.30.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.30.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.30.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.31.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.31.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.31.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.32.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.32.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.32.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.33.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.33.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.33.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.34.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.34.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.34.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.35.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.35.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.35.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.36.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.36.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.36.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.37.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.37.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.37.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.38.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.38.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.38.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.39.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.39.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.39.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.40.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.40.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.40.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.41.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.41.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.41.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.42.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.42.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.42.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.43.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.43.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.43.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.44.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.44.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.44.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.45.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.45.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.45.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.46.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.46.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.46.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.47.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.47.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.47.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.48.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.48.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.48.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.49.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.49.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.49.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.50.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.50.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.50.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.51.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.51.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.51.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.52.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.52.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.52.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.53.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.53.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.53.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.54.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.54.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.54.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.55.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.55.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.55.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.56.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.56.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.56.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.57.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.57.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.57.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.58.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.58.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.58.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.59.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.59.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.59.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.60.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.60.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.60.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.61.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.61.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.61.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.62.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.62.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.62.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.63.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.63.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.63.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.64.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.64.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.64.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.65.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.65.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.65.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.66.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.66.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.66.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.67.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.67.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.67.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.68.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.68.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.68.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.69.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.69.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.69.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.70.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.70.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.70.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.71.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.71.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.71.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.72.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.72.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.72.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.73.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.73.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.73.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.74.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.74.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.74.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.75.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.75.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.75.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.76.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.76.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.76.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.77.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.77.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.77.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.78.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.78.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.78.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.79.gate_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.79.up_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.34.mlp.experts.79.down_proj.weight": "model-00035-of-000051.safetensors",
+ "model.layers.35.input_layernorm.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.pre_mlp_layernorm.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.post_attention_layernorm.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.post_mlp_layernorm.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.self_attn.qkv_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.self_attn.o_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.self_attn.k_layernorm.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.self_attn.param_sink_key": "model-00036-of-000051.safetensors",
+ "model.layers.35.self_attn.param_sink_value": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.gate.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.e_score_correction_bias": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.shared_experts.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.shared_experts.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.shared_experts.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.0.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.0.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.0.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.1.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.1.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.1.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.2.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.2.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.2.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.3.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.3.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.3.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.4.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.4.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.4.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.5.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.5.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.5.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.6.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.6.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.6.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.7.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.7.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.7.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.8.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.8.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.8.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.9.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.9.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.9.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.10.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.10.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.10.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.11.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.11.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.11.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.12.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.12.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.12.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.13.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.13.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.13.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.14.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.14.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.14.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.15.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.15.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.15.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.16.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.16.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.16.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.17.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.17.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.17.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.18.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.18.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.18.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.19.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.19.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.19.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.20.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.20.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.20.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.21.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.21.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.21.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.22.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.22.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.22.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.23.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.23.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.23.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.24.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.24.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.24.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.25.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.25.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.25.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.26.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.26.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.26.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.27.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.27.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.27.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.28.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.28.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.28.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.29.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.29.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.29.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.30.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.30.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.30.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.31.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.31.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.31.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.32.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.32.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.32.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.33.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.33.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.33.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.34.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.34.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.34.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.35.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.35.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.35.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.36.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.36.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.36.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.37.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.37.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.37.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.38.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.38.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.38.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.39.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.39.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.39.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.40.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.40.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.40.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.41.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.41.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.41.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.42.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.42.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.42.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.43.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.43.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.43.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.44.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.44.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.44.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.45.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.45.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.45.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.46.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.46.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.46.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.47.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.47.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.47.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.48.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.48.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.48.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.49.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.49.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.49.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.50.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.50.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.50.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.51.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.51.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.51.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.52.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.52.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.52.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.53.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.53.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.53.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.54.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.54.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.54.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.55.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.55.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.55.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.56.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.56.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.56.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.57.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.57.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.57.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.58.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.58.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.58.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.59.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.59.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.59.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.60.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.60.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.60.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.61.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.61.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.61.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.62.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.62.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.62.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.63.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.63.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.63.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.64.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.64.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.64.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.65.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.65.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.65.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.66.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.66.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.66.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.67.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.67.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.67.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.68.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.68.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.68.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.69.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.69.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.69.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.70.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.70.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.70.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.71.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.71.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.71.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.72.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.72.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.72.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.73.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.73.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.73.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.74.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.74.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.74.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.75.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.75.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.75.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.76.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.76.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.76.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.77.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.77.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.77.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.78.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.78.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.78.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.79.gate_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.79.up_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.35.mlp.experts.79.down_proj.weight": "model-00036-of-000051.safetensors",
+ "model.layers.36.input_layernorm.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.pre_mlp_layernorm.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.post_attention_layernorm.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.post_mlp_layernorm.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.self_attn.qkv_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.self_attn.o_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.self_attn.k_layernorm.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.self_attn.param_sink_key": "model-00037-of-000051.safetensors",
+ "model.layers.36.self_attn.param_sink_value": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.gate.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.e_score_correction_bias": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.shared_experts.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.shared_experts.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.shared_experts.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.0.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.0.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.0.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.1.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.1.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.1.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.2.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.2.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.2.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.3.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.3.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.3.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.4.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.4.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.4.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.5.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.5.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.5.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.6.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.6.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.6.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.7.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.7.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.7.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.8.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.8.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.8.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.9.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.9.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.9.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.10.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.10.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.10.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.11.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.11.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.11.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.12.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.12.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.12.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.13.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.13.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.13.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.14.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.14.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.14.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.15.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.15.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.15.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.16.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.16.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.16.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.17.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.17.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.17.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.18.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.18.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.18.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.19.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.19.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.19.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.20.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.20.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.20.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.21.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.21.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.21.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.22.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.22.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.22.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.23.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.23.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.23.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.24.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.24.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.24.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.25.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.25.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.25.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.26.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.26.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.26.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.27.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.27.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.27.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.28.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.28.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.28.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.29.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.29.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.29.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.30.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.30.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.30.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.31.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.31.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.31.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.32.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.32.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.32.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.33.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.33.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.33.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.34.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.34.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.34.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.35.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.35.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.35.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.36.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.36.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.36.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.37.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.37.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.37.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.38.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.38.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.38.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.39.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.39.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.39.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.40.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.40.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.40.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.41.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.41.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.41.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.42.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.42.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.42.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.43.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.43.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.43.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.44.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.44.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.44.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.45.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.45.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.45.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.46.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.46.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.46.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.47.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.47.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.47.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.48.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.48.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.48.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.49.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.49.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.49.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.50.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.50.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.50.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.51.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.51.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.51.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.52.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.52.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.52.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.53.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.53.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.53.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.54.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.54.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.54.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.55.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.55.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.55.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.56.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.56.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.56.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.57.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.57.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.57.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.58.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.58.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.58.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.59.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.59.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.59.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.60.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.60.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.60.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.61.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.61.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.61.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.62.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.62.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.62.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.63.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.63.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.63.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.64.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.64.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.64.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.65.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.65.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.65.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.66.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.66.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.66.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.67.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.67.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.67.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.68.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.68.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.68.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.69.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.69.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.69.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.70.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.70.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.70.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.71.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.71.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.71.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.72.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.72.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.72.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.73.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.73.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.73.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.74.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.74.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.74.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.75.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.75.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.75.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.76.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.76.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.76.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.77.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.77.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.77.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.78.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.78.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.78.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.79.gate_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.79.up_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.36.mlp.experts.79.down_proj.weight": "model-00037-of-000051.safetensors",
+ "model.layers.37.input_layernorm.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.pre_mlp_layernorm.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.post_attention_layernorm.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.post_mlp_layernorm.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.self_attn.qkv_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.self_attn.o_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.self_attn.k_layernorm.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.self_attn.param_sink_key": "model-00038-of-000051.safetensors",
+ "model.layers.37.self_attn.param_sink_value": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.gate.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.e_score_correction_bias": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.shared_experts.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.shared_experts.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.shared_experts.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.0.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.0.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.0.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.1.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.1.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.1.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.2.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.2.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.2.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.3.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.3.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.3.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.4.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.4.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.4.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.5.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.5.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.5.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.6.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.6.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.6.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.7.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.7.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.7.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.8.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.8.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.8.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.9.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.9.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.9.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.10.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.10.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.10.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.11.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.11.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.11.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.12.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.12.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.12.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.13.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.13.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.13.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.14.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.14.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.14.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.15.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.15.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.15.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.16.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.16.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.16.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.17.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.17.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.17.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.18.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.18.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.18.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.19.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.19.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.19.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.20.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.20.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.20.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.21.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.21.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.21.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.22.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.22.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.22.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.23.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.23.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.23.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.24.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.24.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.24.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.25.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.25.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.25.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.26.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.26.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.26.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.27.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.27.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.27.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.28.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.28.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.28.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.29.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.29.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.29.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.30.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.30.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.30.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.31.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.31.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.31.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.32.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.32.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.32.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.33.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.33.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.33.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.34.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.34.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.34.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.35.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.35.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.35.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.36.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.36.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.36.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.37.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.37.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.37.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.38.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.38.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.38.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.39.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.39.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.39.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.40.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.40.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.40.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.41.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.41.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.41.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.42.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.42.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.42.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.43.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.43.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.43.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.44.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.44.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.44.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.45.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.45.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.45.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.46.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.46.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.46.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.47.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.47.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.47.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.48.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.48.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.48.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.49.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.49.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.49.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.50.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.50.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.50.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.51.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.51.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.51.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.52.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.52.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.52.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.53.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.53.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.53.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.54.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.54.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.54.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.55.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.55.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.55.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.56.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.56.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.56.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.57.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.57.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.57.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.58.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.58.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.58.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.59.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.59.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.59.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.60.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.60.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.60.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.61.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.61.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.61.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.62.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.62.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.62.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.63.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.63.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.63.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.64.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.64.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.64.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.65.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.65.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.65.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.66.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.66.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.66.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.67.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.67.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.67.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.68.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.68.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.68.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.69.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.69.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.69.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.70.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.70.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.70.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.71.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.71.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.71.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.72.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.72.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.72.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.73.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.73.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.73.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.74.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.74.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.74.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.75.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.75.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.75.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.76.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.76.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.76.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.77.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.77.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.77.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.78.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.78.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.78.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.79.gate_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.79.up_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.37.mlp.experts.79.down_proj.weight": "model-00038-of-000051.safetensors",
+ "model.layers.38.input_layernorm.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.pre_mlp_layernorm.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.post_attention_layernorm.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.post_mlp_layernorm.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.self_attn.qkv_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.self_attn.o_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.self_attn.k_layernorm.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.self_attn.param_sink_key": "model-00039-of-000051.safetensors",
+ "model.layers.38.self_attn.param_sink_value": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.gate.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.e_score_correction_bias": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.shared_experts.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.shared_experts.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.shared_experts.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.0.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.0.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.0.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.1.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.1.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.1.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.2.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.2.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.2.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.3.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.3.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.3.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.4.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.4.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.4.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.5.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.5.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.5.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.6.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.6.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.6.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.7.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.7.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.7.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.8.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.8.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.8.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.9.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.9.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.9.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.10.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.10.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.10.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.11.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.11.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.11.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.12.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.12.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.12.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.13.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.13.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.13.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.14.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.14.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.14.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.15.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.15.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.15.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.16.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.16.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.16.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.17.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.17.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.17.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.18.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.18.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.18.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.19.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.19.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.19.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.20.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.20.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.20.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.21.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.21.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.21.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.22.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.22.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.22.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.23.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.23.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.23.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.24.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.24.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.24.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.25.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.25.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.25.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.26.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.26.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.26.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.27.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.27.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.27.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.28.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.28.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.28.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.29.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.29.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.29.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.30.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.30.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.30.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.31.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.31.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.31.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.32.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.32.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.32.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.33.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.33.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.33.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.34.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.34.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.34.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.35.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.35.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.35.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.36.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.36.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.36.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.37.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.37.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.37.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.38.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.38.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.38.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.39.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.39.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.39.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.40.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.40.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.40.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.41.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.41.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.41.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.42.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.42.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.42.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.43.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.43.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.43.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.44.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.44.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.44.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.45.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.45.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.45.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.46.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.46.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.46.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.47.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.47.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.47.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.48.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.48.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.48.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.49.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.49.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.49.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.50.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.50.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.50.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.51.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.51.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.51.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.52.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.52.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.52.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.53.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.53.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.53.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.54.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.54.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.54.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.55.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.55.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.55.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.56.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.56.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.56.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.57.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.57.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.57.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.58.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.58.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.58.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.59.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.59.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.59.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.60.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.60.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.60.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.61.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.61.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.61.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.62.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.62.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.62.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.63.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.63.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.63.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.64.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.64.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.64.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.65.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.65.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.65.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.66.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.66.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.66.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.67.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.67.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.67.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.68.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.68.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.68.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.69.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.69.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.69.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.70.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.70.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.70.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.71.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.71.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.71.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.72.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.72.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.72.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.73.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.73.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.73.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.74.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.74.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.74.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.75.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.75.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.75.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.76.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.76.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.76.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.77.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.77.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.77.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.78.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.78.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.78.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.79.gate_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.79.up_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.38.mlp.experts.79.down_proj.weight": "model-00039-of-000051.safetensors",
+ "model.layers.39.input_layernorm.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.pre_mlp_layernorm.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.post_attention_layernorm.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.post_mlp_layernorm.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.self_attn.qkv_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.self_attn.o_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.self_attn.k_layernorm.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.self_attn.param_sink_key": "model-00040-of-000051.safetensors",
+ "model.layers.39.self_attn.param_sink_value": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.gate.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.e_score_correction_bias": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.shared_experts.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.shared_experts.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.shared_experts.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.0.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.0.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.0.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.1.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.1.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.1.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.2.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.2.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.2.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.3.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.3.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.3.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.4.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.4.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.4.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.5.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.5.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.5.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.6.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.6.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.6.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.7.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.7.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.7.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.8.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.8.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.8.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.9.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.9.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.9.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.10.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.10.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.10.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.11.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.11.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.11.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.12.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.12.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.12.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.13.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.13.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.13.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.14.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.14.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.14.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.15.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.15.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.15.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.16.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.16.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.16.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.17.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.17.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.17.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.18.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.18.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.18.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.19.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.19.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.19.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.20.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.20.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.20.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.21.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.21.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.21.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.22.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.22.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.22.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.23.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.23.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.23.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.24.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.24.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.24.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.25.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.25.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.25.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.26.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.26.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.26.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.27.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.27.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.27.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.28.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.28.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.28.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.29.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.29.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.29.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.30.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.30.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.30.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.31.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.31.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.31.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.32.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.32.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.32.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.33.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.33.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.33.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.34.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.34.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.34.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.35.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.35.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.35.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.36.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.36.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.36.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.37.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.37.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.37.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.38.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.38.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.38.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.39.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.39.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.39.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.40.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.40.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.40.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.41.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.41.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.41.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.42.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.42.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.42.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.43.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.43.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.43.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.44.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.44.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.44.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.45.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.45.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.45.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.46.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.46.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.46.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.47.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.47.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.47.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.48.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.48.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.48.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.49.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.49.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.49.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.50.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.50.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.50.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.51.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.51.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.51.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.52.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.52.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.52.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.53.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.53.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.53.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.54.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.54.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.54.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.55.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.55.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.55.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.56.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.56.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.56.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.57.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.57.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.57.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.58.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.58.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.58.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.59.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.59.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.59.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.60.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.60.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.60.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.61.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.61.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.61.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.62.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.62.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.62.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.63.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.63.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.63.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.64.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.64.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.64.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.65.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.65.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.65.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.66.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.66.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.66.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.67.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.67.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.67.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.68.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.68.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.68.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.69.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.69.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.69.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.70.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.70.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.70.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.71.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.71.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.71.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.72.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.72.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.72.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.73.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.73.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.73.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.74.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.74.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.74.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.75.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.75.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.75.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.76.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.76.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.76.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.77.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.77.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.77.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.78.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.78.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.78.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.79.gate_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.79.up_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.39.mlp.experts.79.down_proj.weight": "model-00040-of-000051.safetensors",
+ "model.layers.40.input_layernorm.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.pre_mlp_layernorm.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.post_attention_layernorm.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.post_mlp_layernorm.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.self_attn.qkv_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.self_attn.o_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.self_attn.k_layernorm.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.self_attn.param_sink_key": "model-00041-of-000051.safetensors",
+ "model.layers.40.self_attn.param_sink_value": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.gate.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.e_score_correction_bias": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.shared_experts.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.shared_experts.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.shared_experts.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.0.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.0.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.0.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.1.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.1.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.1.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.2.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.2.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.2.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.3.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.3.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.3.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.4.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.4.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.4.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.5.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.5.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.5.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.6.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.6.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.6.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.7.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.7.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.7.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.8.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.8.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.8.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.9.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.9.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.9.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.10.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.10.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.10.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.11.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.11.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.11.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.12.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.12.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.12.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.13.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.13.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.13.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.14.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.14.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.14.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.15.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.15.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.15.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.16.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.16.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.16.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.17.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.17.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.17.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.18.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.18.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.18.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.19.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.19.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.19.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.20.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.20.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.20.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.21.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.21.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.21.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.22.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.22.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.22.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.23.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.23.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.23.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.24.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.24.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.24.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.25.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.25.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.25.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.26.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.26.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.26.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.27.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.27.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.27.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.28.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.28.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.28.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.29.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.29.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.29.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.30.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.30.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.30.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.31.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.31.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.31.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.32.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.32.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.32.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.33.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.33.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.33.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.34.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.34.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.34.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.35.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.35.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.35.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.36.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.36.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.36.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.37.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.37.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.37.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.38.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.38.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.38.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.39.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.39.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.39.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.40.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.40.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.40.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.41.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.41.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.41.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.42.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.42.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.42.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.43.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.43.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.43.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.44.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.44.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.44.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.45.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.45.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.45.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.46.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.46.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.46.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.47.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.47.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.47.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.48.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.48.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.48.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.49.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.49.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.49.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.50.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.50.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.50.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.51.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.51.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.51.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.52.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.52.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.52.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.53.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.53.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.53.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.54.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.54.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.54.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.55.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.55.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.55.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.56.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.56.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.56.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.57.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.57.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.57.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.58.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.58.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.58.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.59.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.59.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.59.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.60.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.60.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.60.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.61.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.61.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.61.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.62.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.62.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.62.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.63.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.63.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.63.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.64.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.64.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.64.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.65.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.65.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.65.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.66.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.66.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.66.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.67.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.67.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.67.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.68.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.68.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.68.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.69.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.69.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.69.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.70.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.70.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.70.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.71.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.71.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.71.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.72.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.72.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.72.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.73.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.73.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.73.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.74.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.74.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.74.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.75.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.75.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.75.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.76.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.76.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.76.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.77.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.77.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.77.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.78.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.78.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.78.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.79.gate_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.79.up_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.40.mlp.experts.79.down_proj.weight": "model-00041-of-000051.safetensors",
+ "model.layers.41.input_layernorm.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.pre_mlp_layernorm.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.post_attention_layernorm.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.post_mlp_layernorm.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.self_attn.qkv_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.self_attn.o_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.self_attn.k_layernorm.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.self_attn.param_sink_key": "model-00042-of-000051.safetensors",
+ "model.layers.41.self_attn.param_sink_value": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.gate.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.e_score_correction_bias": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.shared_experts.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.shared_experts.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.shared_experts.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.0.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.0.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.0.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.1.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.1.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.1.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.2.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.2.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.2.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.3.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.3.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.3.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.4.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.4.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.4.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.5.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.5.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.5.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.6.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.6.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.6.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.7.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.7.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.7.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.8.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.8.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.8.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.9.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.9.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.9.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.10.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.10.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.10.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.11.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.11.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.11.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.12.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.12.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.12.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.13.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.13.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.13.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.14.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.14.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.14.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.15.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.15.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.15.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.16.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.16.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.16.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.17.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.17.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.17.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.18.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.18.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.18.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.19.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.19.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.19.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.20.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.20.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.20.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.21.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.21.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.21.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.22.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.22.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.22.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.23.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.23.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.23.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.24.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.24.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.24.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.25.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.25.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.25.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.26.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.26.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.26.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.27.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.27.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.27.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.28.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.28.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.28.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.29.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.29.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.29.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.30.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.30.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.30.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.31.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.31.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.31.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.32.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.32.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.32.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.33.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.33.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.33.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.34.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.34.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.34.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.35.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.35.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.35.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.36.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.36.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.36.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.37.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.37.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.37.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.38.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.38.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.38.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.39.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.39.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.39.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.40.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.40.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.40.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.41.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.41.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.41.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.42.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.42.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.42.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.43.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.43.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.43.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.44.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.44.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.44.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.45.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.45.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.45.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.46.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.46.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.46.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.47.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.47.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.47.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.48.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.48.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.48.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.49.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.49.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.49.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.50.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.50.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.50.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.51.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.51.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.51.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.52.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.52.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.52.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.53.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.53.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.53.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.54.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.54.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.54.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.55.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.55.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.55.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.56.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.56.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.56.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.57.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.57.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.57.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.58.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.58.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.58.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.59.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.59.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.59.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.60.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.60.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.60.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.61.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.61.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.61.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.62.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.62.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.62.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.63.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.63.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.63.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.64.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.64.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.64.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.65.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.65.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.65.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.66.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.66.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.66.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.67.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.67.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.67.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.68.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.68.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.68.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.69.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.69.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.69.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.70.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.70.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.70.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.71.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.71.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.71.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.72.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.72.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.72.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.73.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.73.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.73.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.74.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.74.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.74.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.75.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.75.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.75.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.76.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.76.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.76.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.77.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.77.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.77.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.78.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.78.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.78.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.79.gate_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.79.up_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.41.mlp.experts.79.down_proj.weight": "model-00042-of-000051.safetensors",
+ "model.layers.42.input_layernorm.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.pre_mlp_layernorm.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.post_attention_layernorm.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.post_mlp_layernorm.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.self_attn.qkv_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.self_attn.o_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.self_attn.k_layernorm.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.self_attn.param_sink_key": "model-00043-of-000051.safetensors",
+ "model.layers.42.self_attn.param_sink_value": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.gate.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.e_score_correction_bias": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.shared_experts.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.shared_experts.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.shared_experts.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.0.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.0.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.0.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.1.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.1.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.1.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.2.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.2.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.2.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.3.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.3.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.3.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.4.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.4.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.4.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.5.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.5.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.5.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.6.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.6.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.6.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.7.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.7.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.7.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.8.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.8.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.8.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.9.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.9.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.9.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.10.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.10.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.10.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.11.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.11.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.11.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.12.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.12.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.12.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.13.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.13.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.13.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.14.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.14.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.14.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.15.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.15.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.15.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.16.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.16.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.16.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.17.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.17.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.17.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.18.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.18.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.18.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.19.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.19.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.19.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.20.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.20.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.20.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.21.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.21.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.21.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.22.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.22.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.22.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.23.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.23.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.23.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.24.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.24.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.24.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.25.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.25.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.25.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.26.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.26.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.26.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.27.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.27.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.27.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.28.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.28.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.28.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.29.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.29.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.29.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.30.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.30.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.30.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.31.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.31.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.31.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.32.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.32.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.32.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.33.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.33.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.33.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.34.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.34.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.34.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.35.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.35.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.35.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.36.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.36.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.36.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.37.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.37.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.37.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.38.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.38.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.38.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.39.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.39.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.39.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.40.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.40.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.40.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.41.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.41.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.41.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.42.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.42.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.42.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.43.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.43.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.43.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.44.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.44.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.44.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.45.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.45.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.45.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.46.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.46.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.46.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.47.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.47.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.47.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.48.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.48.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.48.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.49.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.49.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.49.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.50.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.50.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.50.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.51.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.51.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.51.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.52.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.52.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.52.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.53.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.53.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.53.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.54.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.54.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.54.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.55.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.55.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.55.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.56.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.56.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.56.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.57.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.57.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.57.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.58.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.58.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.58.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.59.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.59.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.59.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.60.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.60.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.60.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.61.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.61.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.61.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.62.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.62.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.62.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.63.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.63.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.63.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.64.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.64.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.64.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.65.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.65.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.65.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.66.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.66.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.66.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.67.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.67.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.67.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.68.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.68.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.68.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.69.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.69.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.69.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.70.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.70.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.70.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.71.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.71.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.71.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.72.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.72.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.72.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.73.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.73.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.73.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.74.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.74.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.74.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.75.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.75.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.75.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.76.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.76.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.76.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.77.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.77.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.77.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.78.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.78.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.78.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.79.gate_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.79.up_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.42.mlp.experts.79.down_proj.weight": "model-00043-of-000051.safetensors",
+ "model.layers.43.input_layernorm.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.pre_mlp_layernorm.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.post_attention_layernorm.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.post_mlp_layernorm.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.self_attn.qkv_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.self_attn.o_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.self_attn.k_layernorm.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.self_attn.param_sink_key": "model-00044-of-000051.safetensors",
+ "model.layers.43.self_attn.param_sink_value": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.gate.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.e_score_correction_bias": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.shared_experts.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.shared_experts.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.shared_experts.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.0.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.0.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.0.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.1.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.1.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.1.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.2.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.2.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.2.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.3.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.3.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.3.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.4.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.4.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.4.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.5.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.5.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.5.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.6.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.6.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.6.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.7.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.7.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.7.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.8.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.8.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.8.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.9.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.9.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.9.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.10.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.10.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.10.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.11.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.11.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.11.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.12.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.12.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.12.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.13.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.13.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.13.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.14.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.14.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.14.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.15.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.15.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.15.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.16.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.16.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.16.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.17.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.17.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.17.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.18.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.18.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.18.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.19.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.19.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.19.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.20.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.20.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.20.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.21.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.21.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.21.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.22.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.22.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.22.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.23.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.23.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.23.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.24.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.24.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.24.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.25.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.25.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.25.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.26.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.26.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.26.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.27.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.27.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.27.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.28.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.28.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.28.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.29.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.29.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.29.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.30.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.30.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.30.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.31.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.31.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.31.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.32.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.32.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.32.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.33.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.33.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.33.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.34.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.34.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.34.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.35.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.35.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.35.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.36.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.36.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.36.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.37.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.37.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.37.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.38.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.38.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.38.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.39.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.39.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.39.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.40.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.40.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.40.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.41.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.41.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.41.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.42.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.42.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.42.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.43.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.43.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.43.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.44.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.44.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.44.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.45.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.45.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.45.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.46.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.46.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.46.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.47.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.47.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.47.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.48.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.48.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.48.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.49.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.49.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.49.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.50.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.50.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.50.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.51.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.51.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.51.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.52.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.52.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.52.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.53.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.53.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.53.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.54.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.54.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.54.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.55.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.55.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.55.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.56.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.56.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.56.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.57.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.57.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.57.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.58.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.58.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.58.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.59.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.59.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.59.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.60.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.60.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.60.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.61.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.61.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.61.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.62.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.62.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.62.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.63.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.63.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.63.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.64.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.64.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.64.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.65.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.65.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.65.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.66.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.66.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.66.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.67.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.67.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.67.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.68.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.68.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.68.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.69.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.69.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.69.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.70.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.70.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.70.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.71.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.71.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.71.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.72.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.72.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.72.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.73.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.73.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.73.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.74.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.74.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.74.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.75.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.75.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.75.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.76.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.76.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.76.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.77.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.77.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.77.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.78.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.78.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.78.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.79.gate_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.79.up_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.43.mlp.experts.79.down_proj.weight": "model-00044-of-000051.safetensors",
+ "model.layers.44.input_layernorm.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.pre_mlp_layernorm.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.post_attention_layernorm.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.post_mlp_layernorm.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.self_attn.qkv_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.self_attn.o_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.self_attn.k_layernorm.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.self_attn.param_sink_key": "model-00045-of-000051.safetensors",
+ "model.layers.44.self_attn.param_sink_value": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.gate.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.e_score_correction_bias": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.shared_experts.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.shared_experts.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.shared_experts.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.0.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.0.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.0.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.1.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.1.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.1.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.2.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.2.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.2.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.3.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.3.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.3.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.4.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.4.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.4.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.5.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.5.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.5.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.6.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.6.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.6.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.7.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.7.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.7.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.8.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.8.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.8.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.9.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.9.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.9.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.10.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.10.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.10.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.11.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.11.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.11.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.12.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.12.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.12.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.13.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.13.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.13.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.14.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.14.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.14.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.15.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.15.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.15.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.16.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.16.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.16.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.17.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.17.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.17.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.18.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.18.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.18.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.19.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.19.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.19.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.20.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.20.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.20.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.21.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.21.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.21.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.22.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.22.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.22.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.23.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.23.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.23.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.24.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.24.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.24.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.25.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.25.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.25.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.26.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.26.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.26.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.27.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.27.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.27.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.28.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.28.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.28.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.29.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.29.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.29.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.30.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.30.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.30.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.31.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.31.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.31.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.32.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.32.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.32.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.33.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.33.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.33.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.34.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.34.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.34.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.35.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.35.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.35.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.36.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.36.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.36.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.37.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.37.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.37.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.38.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.38.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.38.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.39.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.39.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.39.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.40.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.40.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.40.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.41.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.41.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.41.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.42.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.42.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.42.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.43.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.43.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.43.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.44.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.44.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.44.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.45.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.45.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.45.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.46.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.46.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.46.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.47.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.47.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.47.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.48.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.48.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.48.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.49.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.49.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.49.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.50.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.50.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.50.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.51.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.51.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.51.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.52.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.52.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.52.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.53.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.53.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.53.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.54.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.54.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.54.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.55.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.55.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.55.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.56.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.56.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.56.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.57.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.57.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.57.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.58.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.58.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.58.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.59.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.59.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.59.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.60.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.60.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.60.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.61.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.61.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.61.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.62.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.62.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.62.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.63.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.63.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.63.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.64.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.64.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.64.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.65.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.65.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.65.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.66.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.66.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.66.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.67.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.67.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.67.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.68.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.68.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.68.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.69.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.69.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.69.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.70.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.70.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.70.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.71.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.71.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.71.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.72.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.72.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.72.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.73.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.73.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.73.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.74.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.74.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.74.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.75.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.75.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.75.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.76.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.76.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.76.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.77.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.77.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.77.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.78.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.78.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.78.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.79.gate_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.79.up_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.44.mlp.experts.79.down_proj.weight": "model-00045-of-000051.safetensors",
+ "model.layers.45.input_layernorm.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.pre_mlp_layernorm.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.post_attention_layernorm.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.post_mlp_layernorm.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.self_attn.qkv_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.self_attn.o_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.self_attn.k_layernorm.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.self_attn.param_sink_key": "model-00046-of-000051.safetensors",
+ "model.layers.45.self_attn.param_sink_value": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.gate.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.e_score_correction_bias": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.shared_experts.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.shared_experts.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.shared_experts.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.0.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.0.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.0.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.1.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.1.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.1.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.2.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.2.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.2.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.3.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.3.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.3.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.4.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.4.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.4.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.5.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.5.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.5.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.6.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.6.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.6.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.7.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.7.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.7.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.8.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.8.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.8.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.9.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.9.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.9.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.10.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.10.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.10.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.11.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.11.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.11.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.12.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.12.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.12.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.13.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.13.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.13.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.14.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.14.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.14.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.15.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.15.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.15.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.16.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.16.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.16.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.17.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.17.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.17.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.18.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.18.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.18.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.19.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.19.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.19.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.20.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.20.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.20.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.21.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.21.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.21.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.22.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.22.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.22.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.23.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.23.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.23.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.24.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.24.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.24.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.25.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.25.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.25.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.26.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.26.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.26.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.27.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.27.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.27.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.28.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.28.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.28.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.29.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.29.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.29.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.30.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.30.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.30.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.31.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.31.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.31.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.32.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.32.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.32.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.33.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.33.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.33.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.34.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.34.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.34.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.35.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.35.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.35.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.36.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.36.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.36.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.37.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.37.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.37.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.38.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.38.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.38.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.39.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.39.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.39.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.40.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.40.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.40.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.41.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.41.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.41.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.42.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.42.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.42.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.43.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.43.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.43.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.44.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.44.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.44.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.45.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.45.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.45.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.46.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.46.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.46.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.47.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.47.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.47.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.48.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.48.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.48.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.49.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.49.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.49.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.50.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.50.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.50.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.51.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.51.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.51.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.52.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.52.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.52.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.53.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.53.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.53.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.54.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.54.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.54.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.55.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.55.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.55.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.56.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.56.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.56.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.57.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.57.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.57.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.58.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.58.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.58.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.59.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.59.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.59.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.60.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.60.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.60.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.61.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.61.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.61.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.62.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.62.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.62.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.63.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.63.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.63.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.64.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.64.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.64.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.65.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.65.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.65.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.66.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.66.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.66.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.67.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.67.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.67.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.68.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.68.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.68.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.69.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.69.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.69.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.70.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.70.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.70.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.71.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.71.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.71.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.72.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.72.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.72.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.73.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.73.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.73.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.74.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.74.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.74.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.75.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.75.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.75.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.76.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.76.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.76.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.77.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.77.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.77.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.78.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.78.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.78.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.79.gate_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.79.up_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.45.mlp.experts.79.down_proj.weight": "model-00046-of-000051.safetensors",
+ "model.layers.46.input_layernorm.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.pre_mlp_layernorm.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.post_attention_layernorm.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.post_mlp_layernorm.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.self_attn.qkv_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.self_attn.o_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.self_attn.k_layernorm.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.self_attn.param_sink_key": "model-00047-of-000051.safetensors",
+ "model.layers.46.self_attn.param_sink_value": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.gate.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.e_score_correction_bias": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.shared_experts.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.shared_experts.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.shared_experts.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.0.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.0.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.0.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.1.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.1.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.1.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.2.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.2.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.2.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.3.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.3.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.3.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.4.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.4.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.4.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.5.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.5.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.5.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.6.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.6.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.6.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.7.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.7.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.7.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.8.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.8.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.8.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.9.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.9.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.9.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.10.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.10.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.10.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.11.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.11.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.11.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.12.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.12.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.12.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.13.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.13.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.13.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.14.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.14.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.14.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.15.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.15.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.15.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.16.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.16.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.16.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.17.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.17.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.17.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.18.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.18.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.18.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.19.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.19.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.19.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.20.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.20.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.20.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.21.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.21.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.21.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.22.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.22.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.22.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.23.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.23.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.23.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.24.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.24.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.24.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.25.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.25.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.25.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.26.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.26.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.26.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.27.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.27.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.27.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.28.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.28.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.28.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.29.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.29.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.29.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.30.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.30.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.30.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.31.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.31.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.31.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.32.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.32.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.32.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.33.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.33.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.33.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.34.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.34.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.34.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.35.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.35.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.35.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.36.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.36.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.36.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.37.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.37.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.37.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.38.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.38.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.38.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.39.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.39.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.39.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.40.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.40.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.40.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.41.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.41.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.41.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.42.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.42.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.42.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.43.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.43.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.43.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.44.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.44.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.44.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.45.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.45.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.45.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.46.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.46.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.46.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.47.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.47.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.47.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.48.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.48.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.48.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.49.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.49.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.49.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.50.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.50.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.50.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.51.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.51.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.51.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.52.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.52.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.52.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.53.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.53.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.53.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.54.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.54.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.54.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.55.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.55.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.55.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.56.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.56.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.56.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.57.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.57.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.57.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.58.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.58.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.58.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.59.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.59.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.59.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.60.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.60.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.60.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.61.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.61.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.61.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.62.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.62.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.62.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.63.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.63.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.63.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.64.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.64.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.64.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.65.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.65.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.65.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.66.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.66.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.66.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.67.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.67.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.67.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.68.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.68.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.68.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.69.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.69.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.69.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.70.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.70.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.70.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.71.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.71.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.71.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.72.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.72.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.72.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.73.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.73.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.73.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.74.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.74.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.74.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.75.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.75.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.75.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.76.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.76.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.76.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.77.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.77.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.77.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.78.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.78.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.78.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.79.gate_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.79.up_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.46.mlp.experts.79.down_proj.weight": "model-00047-of-000051.safetensors",
+ "model.layers.47.input_layernorm.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.pre_mlp_layernorm.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.post_attention_layernorm.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.post_mlp_layernorm.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.self_attn.qkv_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.self_attn.o_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.self_attn.k_layernorm.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.self_attn.param_sink_key": "model-00048-of-000051.safetensors",
+ "model.layers.47.self_attn.param_sink_value": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.gate.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.e_score_correction_bias": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.shared_experts.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.shared_experts.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.shared_experts.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.0.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.0.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.0.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.1.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.1.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.1.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.2.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.2.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.2.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.3.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.3.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.3.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.4.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.4.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.4.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.5.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.5.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.5.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.6.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.6.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.6.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.7.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.7.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.7.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.8.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.8.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.8.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.9.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.9.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.9.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.10.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.10.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.10.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.11.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.11.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.11.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.12.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.12.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.12.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.13.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.13.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.13.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.14.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.14.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.14.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.15.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.15.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.15.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.16.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.16.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.16.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.17.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.17.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.17.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.18.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.18.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.18.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.19.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.19.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.19.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.20.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.20.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.20.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.21.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.21.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.21.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.22.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.22.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.22.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.23.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.23.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.23.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.24.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.24.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.24.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.25.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.25.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.25.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.26.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.26.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.26.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.27.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.27.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.27.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.28.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.28.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.28.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.29.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.29.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.29.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.30.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.30.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.30.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.31.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.31.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.31.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.32.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.32.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.32.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.33.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.33.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.33.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.34.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.34.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.34.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.35.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.35.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.35.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.36.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.36.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.36.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.37.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.37.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.37.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.38.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.38.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.38.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.39.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.39.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.39.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.40.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.40.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.40.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.41.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.41.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.41.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.42.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.42.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.42.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.43.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.43.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.43.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.44.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.44.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.44.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.45.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.45.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.45.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.46.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.46.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.46.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.47.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.47.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.47.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.48.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.48.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.48.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.49.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.49.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.49.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.50.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.50.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.50.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.51.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.51.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.51.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.52.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.52.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.52.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.53.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.53.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.53.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.54.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.54.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.54.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.55.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.55.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.55.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.56.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.56.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.56.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.57.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.57.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.57.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.58.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.58.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.58.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.59.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.59.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.59.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.60.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.60.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.60.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.61.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.61.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.61.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.62.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.62.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.62.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.63.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.63.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.63.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.64.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.64.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.64.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.65.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.65.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.65.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.66.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.66.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.66.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.67.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.67.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.67.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.68.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.68.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.68.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.69.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.69.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.69.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.70.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.70.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.70.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.71.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.71.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.71.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.72.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.72.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.72.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.73.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.73.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.73.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.74.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.74.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.74.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.75.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.75.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.75.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.76.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.76.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.76.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.77.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.77.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.77.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.78.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.78.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.78.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.79.gate_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.79.up_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.47.mlp.experts.79.down_proj.weight": "model-00048-of-000051.safetensors",
+ "model.layers.48.input_layernorm.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.pre_mlp_layernorm.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.post_attention_layernorm.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.post_mlp_layernorm.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.self_attn.qkv_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.self_attn.o_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.self_attn.k_layernorm.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.self_attn.param_sink_key": "model-00049-of-000051.safetensors",
+ "model.layers.48.self_attn.param_sink_value": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.gate.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.e_score_correction_bias": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.shared_experts.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.shared_experts.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.shared_experts.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.0.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.0.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.0.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.1.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.1.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.1.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.2.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.2.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.2.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.3.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.3.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.3.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.4.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.4.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.4.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.5.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.5.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.5.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.6.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.6.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.6.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.7.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.7.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.7.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.8.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.8.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.8.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.9.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.9.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.9.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.10.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.10.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.10.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.11.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.11.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.11.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.12.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.12.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.12.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.13.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.13.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.13.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.14.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.14.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.14.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.15.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.15.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.15.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.16.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.16.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.16.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.17.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.17.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.17.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.18.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.18.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.18.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.19.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.19.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.19.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.20.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.20.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.20.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.21.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.21.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.21.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.22.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.22.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.22.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.23.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.23.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.23.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.24.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.24.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.24.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.25.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.25.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.25.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.26.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.26.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.26.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.27.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.27.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.27.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.28.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.28.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.28.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.29.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.29.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.29.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.30.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.30.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.30.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.31.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.31.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.31.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.32.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.32.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.32.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.33.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.33.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.33.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.34.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.34.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.34.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.35.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.35.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.35.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.36.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.36.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.36.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.37.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.37.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.37.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.38.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.38.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.38.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.39.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.39.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.39.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.40.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.40.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.40.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.41.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.41.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.41.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.42.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.42.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.42.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.43.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.43.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.43.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.44.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.44.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.44.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.45.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.45.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.45.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.46.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.46.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.46.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.47.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.47.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.47.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.48.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.48.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.48.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.49.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.49.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.49.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.50.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.50.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.50.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.51.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.51.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.51.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.52.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.52.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.52.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.53.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.53.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.53.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.54.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.54.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.54.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.55.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.55.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.55.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.56.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.56.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.56.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.57.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.57.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.57.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.58.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.58.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.58.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.59.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.59.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.59.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.60.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.60.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.60.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.61.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.61.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.61.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.62.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.62.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.62.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.63.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.63.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.63.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.64.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.64.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.64.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.65.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.65.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.65.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.66.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.66.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.66.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.67.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.67.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.67.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.68.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.68.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.68.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.69.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.69.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.69.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.70.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.70.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.70.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.71.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.71.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.71.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.72.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.72.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.72.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.73.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.73.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.73.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.74.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.74.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.74.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.75.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.75.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.75.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.76.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.76.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.76.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.77.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.77.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.77.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.78.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.78.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.78.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.79.gate_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.79.up_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.48.mlp.experts.79.down_proj.weight": "model-00049-of-000051.safetensors",
+ "model.layers.49.input_layernorm.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.pre_mlp_layernorm.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.post_attention_layernorm.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.post_mlp_layernorm.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.self_attn.qkv_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.self_attn.o_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.self_attn.k_layernorm.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.self_attn.param_sink_key": "model-00050-of-000051.safetensors",
+ "model.layers.49.self_attn.param_sink_value": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.gate.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.e_score_correction_bias": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.shared_experts.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.shared_experts.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.shared_experts.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.0.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.0.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.0.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.1.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.1.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.1.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.2.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.2.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.2.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.3.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.3.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.3.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.4.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.4.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.4.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.5.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.5.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.5.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.6.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.6.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.6.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.7.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.7.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.7.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.8.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.8.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.8.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.9.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.9.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.9.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.10.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.10.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.10.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.11.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.11.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.11.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.12.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.12.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.12.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.13.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.13.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.13.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.14.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.14.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.14.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.15.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.15.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.15.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.16.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.16.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.16.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.17.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.17.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.17.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.18.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.18.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.18.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.19.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.19.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.19.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.20.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.20.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.20.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.21.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.21.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.21.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.22.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.22.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.22.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.23.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.23.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.23.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.24.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.24.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.24.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.25.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.25.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.25.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.26.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.26.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.26.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.27.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.27.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.27.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.28.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.28.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.28.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.29.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.29.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.29.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.30.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.30.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.30.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.31.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.31.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.31.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.32.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.32.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.32.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.33.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.33.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.33.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.34.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.34.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.34.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.35.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.35.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.35.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.36.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.36.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.36.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.37.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.37.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.37.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.38.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.38.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.38.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.39.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.39.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.39.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.40.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.40.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.40.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.41.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.41.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.41.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.42.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.42.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.42.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.43.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.43.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.43.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.44.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.44.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.44.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.45.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.45.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.45.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.46.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.46.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.46.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.47.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.47.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.47.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.48.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.48.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.48.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.49.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.49.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.49.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.50.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.50.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.50.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.51.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.51.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.51.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.52.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.52.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.52.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.53.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.53.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.53.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.54.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.54.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.54.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.55.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.55.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.55.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.56.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.56.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.56.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.57.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.57.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.57.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.58.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.58.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.58.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.59.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.59.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.59.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.60.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.60.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.60.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.61.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.61.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.61.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.62.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.62.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.62.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.63.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.63.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.63.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.64.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.64.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.64.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.65.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.65.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.65.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.66.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.66.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.66.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.67.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.67.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.67.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.68.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.68.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.68.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.69.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.69.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.69.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.70.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.70.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.70.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.71.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.71.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.71.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.72.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.72.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.72.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.73.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.73.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.73.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.74.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.74.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.74.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.75.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.75.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.75.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.76.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.76.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.76.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.77.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.77.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.77.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.78.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.78.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.78.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.79.gate_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.79.up_proj.weight": "model-00050-of-000051.safetensors",
+ "model.layers.49.mlp.experts.79.down_proj.weight": "model-00050-of-000051.safetensors",
+ "model.norm.weight": "model-00050-of-000051.safetensors",
+ "lm_head.weight": "model-00050-of-000051.safetensors",
+ "model.layers.50.embed_tokens.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.enorm.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.hnorm.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.eh_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.shared_head.norm.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.shared_head.head.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.input_layernorm.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.pre_mlp_layernorm.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.post_attention_layernorm.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.post_mlp_layernorm.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.self_attn.qkv_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.self_attn.o_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.self_attn.k_layernorm.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.self_attn.param_sink_key": "model-00051-of-000051.safetensors",
+ "model.layers.50.self_attn.param_sink_value": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.gate.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.e_score_correction_bias": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.shared_experts.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.shared_experts.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.shared_experts.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.0.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.0.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.0.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.1.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.1.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.1.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.2.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.2.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.2.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.3.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.3.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.3.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.4.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.4.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.4.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.5.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.5.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.5.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.6.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.6.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.6.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.7.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.7.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.7.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.8.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.8.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.8.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.9.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.9.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.9.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.10.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.10.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.10.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.11.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.11.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.11.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.12.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.12.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.12.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.13.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.13.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.13.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.14.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.14.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.14.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.15.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.15.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.15.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.16.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.16.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.16.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.17.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.17.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.17.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.18.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.18.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.18.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.19.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.19.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.19.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.20.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.20.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.20.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.21.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.21.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.21.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.22.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.22.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.22.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.23.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.23.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.23.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.24.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.24.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.24.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.25.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.25.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.25.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.26.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.26.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.26.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.27.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.27.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.27.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.28.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.28.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.28.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.29.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.29.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.29.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.30.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.30.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.30.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.31.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.31.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.31.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.32.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.32.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.32.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.33.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.33.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.33.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.34.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.34.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.34.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.35.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.35.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.35.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.36.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.36.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.36.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.37.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.37.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.37.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.38.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.38.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.38.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.39.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.39.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.39.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.40.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.40.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.40.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.41.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.41.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.41.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.42.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.42.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.42.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.43.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.43.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.43.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.44.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.44.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.44.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.45.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.45.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.45.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.46.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.46.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.46.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.47.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.47.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.47.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.48.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.48.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.48.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.49.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.49.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.49.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.50.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.50.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.50.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.51.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.51.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.51.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.52.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.52.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.52.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.53.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.53.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.53.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.54.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.54.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.54.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.55.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.55.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.55.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.56.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.56.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.56.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.57.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.57.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.57.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.58.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.58.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.58.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.59.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.59.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.59.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.60.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.60.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.60.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.61.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.61.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.61.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.62.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.62.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.62.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.63.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.63.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.63.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.64.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.64.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.64.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.65.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.65.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.65.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.66.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.66.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.66.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.67.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.67.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.67.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.68.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.68.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.68.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.69.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.69.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.69.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.70.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.70.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.70.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.71.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.71.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.71.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.72.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.72.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.72.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.73.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.73.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.73.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.74.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.74.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.74.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.75.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.75.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.75.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.76.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.76.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.76.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.77.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.77.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.77.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.78.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.78.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.78.down_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.79.gate_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.79.up_proj.weight": "model-00051-of-000051.safetensors",
+ "model.layers.50.mlp.experts.79.down_proj.weight": "model-00051-of-000051.safetensors"
+ }
+}
\ No newline at end of file
diff --git a/special_tokens_map.json b/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..451134b2ddc2e78555d1e857518c54b4bdc2e87d
--- /dev/null
+++ b/special_tokens_map.json
@@ -0,0 +1,23 @@
+{
+ "bos_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "eos_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "unk_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ }
+}
diff --git a/tokenization_openpangu.py b/tokenization_openpangu.py
new file mode 100644
index 0000000000000000000000000000000000000000..980d0cb1eb116c05c8f72f738c0bb7b64ff6e4cc
--- /dev/null
+++ b/tokenization_openpangu.py
@@ -0,0 +1,273 @@
+# coding=utf-8
+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
+# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
+#
+# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
+# and OPT implementations in this library. It has been modified from its
+# original forms to accommodate minor architectural differences compared
+# to GPT-NeoX and OPT used by the Meta AI team that trained the model.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+from shutil import copyfile
+from typing import Any, Dict, List, Optional, Tuple
+
+import sentencepiece as spm
+
+from transformers.tokenization_utils import PreTrainedTokenizer
+from transformers.utils import logging
+
+
+logger = logging.get_logger(__name__)
+
+VOCAB_FILES_NAMES = {"vocab_file": "./tokenizer.model"}
+
+PRETRAINED_VOCAB_FILES_MAP = {}
+
+
+def convert_bool(string):
+ if isinstance(string, str):
+ if string.lower() == "true":
+ return True
+ elif string.lower() == "false":
+ return False
+ else:
+ return string
+ else:
+ return string
+
+
+class OpenPanguTokenizer(PreTrainedTokenizer):
+ """
+ Construct a tokenizer. Based on byte-level Byte-Pair-Encoding.
+
+ Args:
+ vocab_file (`str`):
+ Path to the vocabulary file.
+ """
+
+ vocab_files_names = VOCAB_FILES_NAMES
+ pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
+ model_input_names = ["input_ids", "attention_mask"]
+ _auto_class = "AutoTokenizer"
+
+ def __init__(
+ self,
+ vocab_file,
+ unk_token="",
+ bos_token="",
+ eos_token="",
+ pad_token="",
+ sp_model_kwargs: Optional[Dict[str, Any]] = None,
+ add_bos_token=True,
+ add_eos_token=False,
+ decode_with_prefix_space=False,
+ clean_up_tokenization_spaces=False,
+ **kwargs,
+ ):
+ self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs
+ self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
+ self.sp_model.Load(vocab_file)
+ super().__init__(
+ bos_token=bos_token,
+ eos_token=eos_token,
+ unk_token=unk_token,
+ pad_token=pad_token,
+ clean_up_tokenization_spaces=clean_up_tokenization_spaces,
+ **kwargs,
+ )
+ self.vocab_file = vocab_file
+ self.add_bos_token = convert_bool(add_bos_token)
+ self.add_eos_token = add_eos_token
+ self.decode_with_prefix_space = decode_with_prefix_space
+ self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
+ self.sp_model.Load(vocab_file)
+ self._no_prefix_space_tokens = None
+
+ """ Initialisation"""
+
+ @property
+ def no_prefix_space_tokens(self):
+ if self._no_prefix_space_tokens is None:
+ vocab = self.convert_ids_to_tokens(list(range(self.vocab_size)))
+ self._no_prefix_space_tokens = {i for i, tok in enumerate(vocab) if not tok.startswith("▁")}
+ return self._no_prefix_space_tokens
+
+ @property
+ def vocab_size(self):
+ """Returns vocab size"""
+ return self.sp_model.get_piece_size()
+
+ @property
+ def bos_token_id(self) -> Optional[int]:
+ return self.sp_model.bos_id()
+
+ @property
+ def eos_token_id(self) -> Optional[int]:
+ return super().eos_token_id
+
+ def get_vocab(self):
+ """Returns vocab as a dict"""
+ vocab = {self.convert_ids_to_tokens(i): i for i in range(self.vocab_size)}
+ vocab.update(self.added_tokens_encoder)
+ return vocab
+
+ def _tokenize(self, text):
+ """Returns a tokenized string."""
+ return self.sp_model.encode(text, out_type=str)
+
+ def _convert_token_to_id(self, token):
+ """Converts a token (str) in an id using the vocab."""
+ return self.sp_model.piece_to_id(token)
+
+ def _convert_id_to_token(self, index):
+ """Converts an index (integer) in a token (str) using the vocab."""
+ token = self.sp_model.IdToPiece(index)
+ return token
+
+ def _maybe_add_prefix_space(self, tokens, decoded):
+ if tokens and tokens[0] not in self.no_prefix_space_tokens:
+ return " " + decoded
+ else:
+ return decoded
+
+ def convert_tokens_to_string(self, tokens):
+ """Converts a sequence of tokens (string) in a single string."""
+ current_sub_tokens = []
+ out_string = ""
+ prev_is_special = False
+ for token in tokens:
+ # make sure that special tokens are not decoded using sentencepiece model
+ if token in self.all_special_tokens:
+ # Decode the current sub-tokens first
+ if current_sub_tokens:
+ out_string += self.sp_model.decode(current_sub_tokens)
+ current_sub_tokens = []
+ # Append the special token without adding extra spaces
+ out_string += token
+ prev_is_special = True
+ else:
+ current_sub_tokens.append(token)
+ prev_is_special = False
+ # Decode any remaining sub-tokens
+ if current_sub_tokens:
+ out_string += self.sp_model.decode(current_sub_tokens)
+ # Clean up leading and trailing spaces
+ if self.clean_up_tokenization_spaces:
+ out_string = self.clean_up_tokenization(out_string)
+ out_string = self._maybe_add_prefix_space(tokens=tokens, decoded=out_string)
+ return out_string[1:]
+
+ # Override decode to set spaces_between_special_tokens to True as default
+ def decode(self,
+ token_ids,
+ spaces_between_special_tokens: bool = False,
+ **kwargs):
+ return super().decode(
+ token_ids=token_ids,
+ spaces_between_special_tokens=spaces_between_special_tokens,
+ **kwargs,
+ )
+
+ def save_vocabulary(self, save_directory, filename_prefix: Optional[str] = None) -> Tuple[str]:
+ """
+ Save the vocabulary and special tokens file to a directory.
+
+ Args:
+ save_directory (`str`):
+ The directory in which to save the vocabulary.
+
+ Returns:
+ `Tuple(str)`: Paths to the files saved.
+ """
+ if not os.path.isdir(save_directory):
+ logger.error(f"Vocabulary path ({save_directory}) should be a directory")
+ return ("",)
+ out_vocab_file = os.path.join(
+ save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]
+ )
+
+ if os.path.abspath(self.vocab_file) != os.path.abspath(out_vocab_file) and os.path.isfile(self.vocab_file):
+ copyfile(self.vocab_file, out_vocab_file)
+ elif not os.path.isfile(self.vocab_file):
+ with open(out_vocab_file, "wb") as fi:
+ content_spiece_model = self.sp_model.serialized_model_proto()
+ fi.write(content_spiece_model)
+
+ return (out_vocab_file,)
+
+ def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
+ if self.add_bos_token:
+ bos_token_ids = [self.bos_token_id]
+ else:
+ bos_token_ids = []
+
+ output = bos_token_ids + token_ids_0
+
+ if token_ids_1 is not None:
+ output = output + token_ids_1
+
+ if self.add_eos_token:
+ output = output + [self.eos_token_id]
+
+ return output
+
+ def get_special_tokens_mask(
+ self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None, already_has_special_tokens: bool = False
+ ) -> List[int]:
+ """
+ Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding
+ special tokens using the tokenizer `prepare_for_model` method.
+
+ Args:
+ token_ids_0 (`List[int]`):
+ List of IDs.
+ token_ids_1 (`List[int]`, *optional*):
+ Optional second list of IDs for sequence pairs.
+ already_has_special_tokens (`bool`, *optional*, defaults to `False`):
+ Whether or not the token list is already formatted with special tokens for the model.
+
+ Returns:
+ `List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
+ """
+ if already_has_special_tokens:
+ return super().get_special_tokens_mask(
+ token_ids_0=token_ids_0, token_ids_1=token_ids_1, already_has_special_tokens=True
+ )
+
+ if token_ids_1 is None:
+ return [1] + ([0] * len(token_ids_0)) + [1]
+ return [1] + ([0] * len(token_ids_0)) + [1, 1] + ([0] * len(token_ids_1)) + [1]
+
+ def create_token_type_ids_from_sequences(
+ self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
+ ) -> List[int]:
+ """
+ Create a mask from the two sequences passed to be used in a sequence-pair classification task. T5 does not make
+ use of token type ids, therefore a list of zeros is returned.
+
+ Args:
+ token_ids_0 (`List[int]`):
+ List of IDs.
+ token_ids_1 (`List[int]`, *optional*):
+ Optional second list of IDs for sequence pairs.
+
+ Returns:
+ `List[int]`: List of zeros.
+ """
+ eos = [self.eos_token_id]
+
+ if token_ids_1 is None:
+ return len(token_ids_0 + eos) * [0]
+ return len(token_ids_0 + eos + token_ids_1 + eos) * [0]
diff --git a/tokenizer.model b/tokenizer.model
new file mode 100644
index 0000000000000000000000000000000000000000..14ca2f88ae0f546bcaabe8ae5f35eb5134d5a77d
--- /dev/null
+++ b/tokenizer.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6b16f1558c0cd4ae6ef1a2c605713be0a514f50e1ce2d2c878979ce988c148ec
+size 2477809
diff --git a/tokenizer_config.json b/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d1fb8f9f976b1bd98ac5b73ae63c8877944be7f1
--- /dev/null
+++ b/tokenizer_config.json
@@ -0,0 +1 @@
+{"add_bos_token": true, "add_eos_token": false, "add_prefix_space": true, "added_tokens_decoder": {"0": {"content": "", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "1": {"content": "", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "2": {"content": "", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45806": {"content": "<|User|>:", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45813": {"content": "<|Bot|>:", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45830": {"content": "[unused0]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45840": {"content": "[unused1]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45846": {"content": "[unused2]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45849": {"content": "[unused3]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45861": {"content": "[unused4]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45866": {"content": "[unused5]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45874": {"content": "[unused6]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45883": {"content": "[unused7]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45884": {"content": "[unused8]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45887": {"content": "[unused9]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45892": {"content": "[unused10]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45920": {"content": "[unused11]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45932": {"content": "[unused12]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45938": {"content": "[unused13]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45953": {"content": "[unused14]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45968": {"content": "[unused15]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45974": {"content": "[unused16]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45982": {"content": "[unused17]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45986": {"content": "[unused18]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "46005": {"content": "[unused19]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "46007": {"content": "[unused20]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "46014": {"content": "[unused21]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "46017": {"content": "[unused22]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "46028": {"content": "[unused23]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "46032": {"content": "[unused24]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "46081": {"content": "[unused25]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "46086": {"content": "[unused26]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "46101": {"content": "[unused27]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "46183": {"content": "[unused28]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "46230": {"content": "[unused29]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "46245": {"content": "[unused30]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "46257": {"content": "[unused31]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "144208": {"content": "[unused32]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "144209": {"content": "[unused33]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}}, "auto_map": {"AutoTokenizer": ["tokenization_openpangu.OpenPanguTokenizer", null]}, "bos_token": "", "clean_up_tokenization_spaces": false, "eos_token": "[unused10]", "legacy": true, "model_max_length": 1000000000000000019884624838656, "pad_token": null, "sp_model_kwargs": {}, "spaces_between_special_tokens": false, "tokenizer_class": "OpenPanguTokenizer", "unk_token": "", "use_default_system_prompt": false, "chat_template": "{%- set ns = namespace(is_first_tool=true) %}\n{%- if not mcp_prompt is defined %}\n {%- set mcp_prompt = true %}\n{%- endif %}\n{%- if not background is defined %}\n {%- set background = none %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n{%- if not think is defined %}\n {%- set think = true %}\n{%- endif %}\n{%- if not reasoning_effort is defined %}\n {%- set reasoning_effort = \"high\" %}\n{%- endif %}\n\n{{- '[unused9]系统:' -}}\n{#- 提取系统消息 #}\n{%- set system_message = \"\" %}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- endif %}\n{#- 如果传入工具将使用mcp人设,可以使用mcp_prompt字段禁用 #}\n{%- if mcp_prompt and tools %}\n {%- if system_message %}\n {%- set system_message = system_message + \"\n\" %}\n {%- endif %}\n {%- set system_message = system_message + \"你是一个能够调用外部工具解决问题的专家,你的目标是高效、准确、清晰地完成任务。\n你需要根据用户的问题,决定是否需要使用工具来完成任务。如果需要,请以明确的格式调用工具;如果不需要,请直接回答。\n你可以根据上下文决定是否继续调用工具或基于已有结果直接回答用户。如果工具调用已足够,请合理组织语言向用户汇报结论。在没有获得显式的调用结果之前,在调用工具的当轮回复之内严禁虚构或者假设一个工具调用结果来完成任务或者回答问题。也不应在没有返回工具调用信息的情况下,在调用工具的当轮假设或者明确声称工具执行成功。\" %}\n{%- endif %}\n{#- 思维链分档 #}\n{%- if reasoning_effort == \"low\" and think %}\n {%- if system_message or tools or background %}\n {%- set system_message = \"\n\neffort: compact\n\n\n\n\" + system_message %}\n {%- else %}\n {%- set system_message = \"\n\neffort: compact\n\n\" %}\n {%- endif %}\n{%- endif %}\n{{- system_message -}}\n\n{#- 工具使用描述和规范调用格式 #}\n{%- if tools %}\n {{- '\n你将在标签对内获得每个工具的描述:\n\n' }}\n {{- tools | tojson(ensure_ascii=False, sort_keys=False) }}\n {{- '\n\n' }}\n {{- \"对于每个函数调用,返回一个 JSON 对象,放在 [unused11][unused12] 标签对中,多个调用组成一个列表,其中每个函数包含函数名和对应函数的参数,格式如下:\n\" }}\n {{- '[unused11]\n[{\"name\": \"<函数名1>\", \"arguments\": }, {\"name\": \"<函数名2>\", \"arguments\": }, ...]\n[unused12]' }}\n {{- '\n<工具使用原则>\n1. 只有在所有必填参数(required字段中列出的)都具备有效值时,才能调用该函数\n2. 如果缺少任何必填参数,必须向用户询问缺失的参数,而不是直接调用函数\n3. 可选参数如果没有提供可以忽略或使用默认值\n工具使用原则>' }}\n{%- endif %}\n\n{#- 背景信息字段 #}\n{%- if background is not none and background -%}\n {{- '\n<背景信息>' -}}\n {{- background -}}\n {{- '背景信息>' -}}\n{%- endif %}\n\n{%- if messages | length == 0 and not think %}\n {{- \" /no_think\" -}}\n{%- endif %}\n{{- '[unused10]' -}}\n\n{%- if messages | length != 0 %}\n {%- for message in messages[:-1] %}\n {%- if message['role'] == 'user' %}\n {{- '[unused9]用户:' + message['content'] -}}\n {%- if message.get('tool_calls') %}\n {{- '[unused11]\n[' }}\n {%- set function_list = message.tool_calls | selectattr('function') | map(attribute='function') | list %}\n {%- for function_item in function_list %}\n {%- if not ns.is_first_tool %}\n {{- ', ' -}}\n {%- endif %}\n {%- if function_item.arguments is string %}\n {{- '{\"name\": \"' + function_item.name + '\", \"arguments\": ' + function_item.arguments + '}' -}}\n {%- else %}\n {{- '{\"name\": \"' + function_item.name + '\", \"arguments\": ' + function_item.arguments | tojson(ensure_ascii=False, sort_keys=False) + '}' -}}\n {%- endif %}\n {%- set ns.is_first_tool = false %}\n {%- endfor %}\n {%- set ns.is_first_tool = true %}\n {{- ']\n[unused12]' }}\n {%- endif %}\n {{- \" /no_think\" -}}\n {{- '[unused10]' -}}\n {%- endif %}\n\n {%- if message['role'] == 'assistant' %}\n {{- '[unused9]助手:[unused16][unused17]' -}}\n {{- message['content'] -}}\n {%- if message.get('tool_calls') %}\n {{- '[unused11]\n[' }}\n {%- set function_list = message.tool_calls | selectattr('function') | map(attribute='function') | list %}\n {%- for function_item in function_list %}\n {%- if not ns.is_first_tool %}\n {{- ', ' -}}\n {%- endif %}\n {{- '{\"name\": \"' + function_item.name + '\", \"arguments\": ' + function_item.arguments | tojson(ensure_ascii=False, sort_keys=False) + '}' -}}\n {%- set ns.is_first_tool = false %}\n {%- endfor %}\n {%- set ns.is_first_tool = true %}\n {{- ']\n[unused12]' }}\n {%- endif %}\n {{- '[unused10]' }}\n {%- endif %}\n\n {%- if message['role'] == 'tool' %}\n {{- '[unused9]' -}}\n {{- '工具:' + message['content'] + \" /no_think\" -}}\n {{- '[unused10]' -}}\n {%- endif %}\n {%- endfor %}\n\n {#- 处理最后一个角色,判断快慢思考 #}\n {%- if messages[-1]['role'] == \"user\" %}\n {{- '[unused9]' -}}\n {{- '用户:' + messages[-1]['content'] -}}\n {%- if messages[-1].get('tool_calls') %}\n {{- '[unused11]\n[' }}\n {%- set function_list = messages[-1].tool_calls | selectattr('function') | map(attribute='function') | list %}\n {%- for function_item in function_list %}\n {%- if not ns.is_first_tool %}\n {{- ', ' -}}\n {%- endif %}\n {%- if function_item.arguments is string %}\n {{- '{\"name\": \"' + function_item.name + '\", \"arguments\": ' + function_item.arguments + '}' -}}\n {%- else %}\n {{- '{\"name\": \"' + function_item.name + '\", \"arguments\": ' + function_item.arguments | tojson(ensure_ascii=False, sort_keys=False) + '}' -}}\n {%- endif %}\n {%- set ns.is_first_tool = false %}\n {%- endfor %}\n {%- set ns.is_first_tool = true %}\n {{- ']\n[unused12]' }}\n {%- endif %}\n {%- if not think %}\n {{- \" /no_think\" -}}\n {%- endif %}\n {{- '[unused10]' -}}\n {%- endif %}\n {%- if messages[-1]['role'] == \"tool\" %}\n {{- '[unused9]' -}}\n {{- '工具:' + messages[-1]['content'] -}}\n {%- if not think %}\n {{- \" /no_think\" -}}\n {%- endif %}\n {{- '[unused10]' -}}\n {%- endif %}\n {%- if messages[-1]['role'] == \"assistant\" %}\n {{- '[unused9]' -}}\n {{- '助手:[unused16][unused17]' + messages[-1]['content'] -}}\n {%- if messages[-1].get('tool_calls') %}\n {{- '[unused11]\n[' }}\n {%- set function_list = messages[-1].tool_calls | selectattr('function') | map(attribute='function') | list %}\n {%- for function_item in function_list %}\n {%- if not ns.is_first_tool %}\n {{- ', ' -}}\n {%- endif %}\n {{- '{\"name\": \"' + function_item.name + '\", \"arguments\": ' + function_item.arguments | tojson(ensure_ascii=False, sort_keys=False) + '}' -}}\n {%- set ns.is_first_tool = false %}\n {%- endfor %}\n {%- set ns.is_first_tool = true %}\n {{- ']\n[unused12]' }}\n {%- endif %}\n {%- if not think %}\n {{- \" /no_think\" -}}\n {%- endif %}\n {{- '[unused10]' -}}\n {%- endif %}\n{%- endif %}\n\n{{-'[unused9]助手:' }}\n"}
\ No newline at end of file