Tnt3o5 commited on Feb 3

Commit

d4dc785

verified ·

1 Parent(s): 746fe62

Upload folder using huggingface_hub

Browse files

Files changed (17) hide show

.gitattributes +1 -0
LICENSE +31 -0
README.md +5 -0
STUDENT_LICENSE +13 -0
TEACHER_LICENSE +114 -0
added_tokens.json +28 -0
chat_template.jinja +61 -0
config.json +72 -0
generation_config.json +13 -0
merges.txt +0 -0
model-00001-of-00002.safetensors +3 -0
model-00002-of-00002.safetensors +3 -0
model.safetensors.index.json +406 -0
special_tokens_map.json +38 -0
tokenizer.json +3 -0
tokenizer_config.json +240 -0
vocab.json +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

LICENSE ADDED Viewed

	@@ -0,0 +1,31 @@

+GENERAL TERMS AND CONDITIONS
+Note that if you want to use the Commercial licence, please contact us at contact@distillabs.ai
+- Model License Terms -
+R&D License
+1. SERVICES, PRICES AND PAYMENT
+    1.1 The Customer pays a one-time license fee, as indicated in the check-out process, for running of one (1) training process of the selected Base Model using Customer Data (“License Fee”).
+    1.2 The License Fee shall be due for payment in advance. The Customer shall only be permitted to set off against payment claims of Distil Labs if the Customer’s claims are undisputed or have become res judicata.
+2. MODEL LICENSE: R&D LICENSE
+    2.1 Subject to Customer’s payment of the license fee, Distil Labs grants to Customer the Model License (as defined below). For clarification, Distil Labs retains any other rights in its software or know- how, in particular in the codebase needed for the fine-tuning of the Trained Model.
+    2.2 Subject to the requirements of the Base Model License (cf. Section 2.5 below), Distil Labs transfers to the Customer the perpetual, non-exclusive usage right to the Trained Model for non-commercial purposes of prototyping and research & development. The Parties agree, that commercial purposes include deployment in production externally (to be used by Customer’s customers paid or free of charge) or internally (as a tool for Customer’s employees). The territorial scope of the license is limited to the use within the United States of America and the European Economic Area including all member states of the European Union (“Model License”).
+    2.3 The Model License for non-commercial purposes of prototyping and research & development shall include (i) the non-exclusive right to permanent or temporary reproduction, in whole or in part, by any means and in any form (e.g. permanent and/or volatile storage on electrical, electromagnetic, optical storage media, such as any type of SDD, HDD, DVD, memory cards, USB sticks), (ii) the non-exclusive right to distribution in any form, media and by any means regardless of whether the distribution is in tangible or intangible form, in particular to transmit the Trained Model via wired and wireless networks (e.g. for download from internet or intranet by wire or wireless means including broadband, cable, fiberglass, WIFI, LTE, 5G, satellite internet, other data networks), and (iii) the non-exclusive right of making available to the public in such a way that members of the public can access it from places and at times of their choice (e.g. by web or mobile app, virtual or augmented reality, cloud storage, cloud hosting, decentralized hosting, non-fungible token, application service providing, software as a service, or cloud computing). The license shall also contain, to the extent necessary for prototyping and research & development, the right to adapt and modify the Trained Model subject to the limitation in Section 2.4 and 2.5 below, to further develop the Trained Model including changes to functions or appearance, adapt to other software versions, to exchange parts of the Trained Model or combine the Trained Model with other results of work and to use the results in the same way as the original Trained Model. Any derived models from the Trained Model shall retain this model license.
+    2.4 The Customer shall not, without the prior written consent of Distil Labs:
+        2.4.1 train, fine-tune, re-train, or otherwise modify the Trained Model, unless for purpose of research & development;
+        2.4.2 use the Trained Model or any part thereof to create derivative models or services that compete with those of Distil Labs;
+        2.4.3 circumvent any technical restrictions embedded in the Trained Model or Base Model that are designed to enforce usage limitations.
+    2.5 The Parties acknowledge and agree that the Trained Model is developed from Base Models which are supplied by a third party. Therefore, the Model License is subject to the restrictions resulting from the open-source or any other applicable license of the Base Model (“Base Model License”) and the Customer must use the Trained Model in compliance with the Base Model License. In particular, the Customer must oblige their clients to compliance with the Base Model License in any case of transferring or sublicensing the rights to or making available in any way the Trained Model. The applicable Base Model License is defined in the Training Configuration and will be provided for download. The Customer agrees to indemnify Distil Labs for any and all claims brought by the Base Model provider for violations of the Base Model License.

README.md ADDED Viewed

	@@ -0,0 +1,5 @@

+---
+    library_name: transformers
+    tags: []
+    ---

STUDENT_LICENSE ADDED Viewed

	@@ -0,0 +1,13 @@

+Copyright 2023 Qwen
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.

TEACHER_LICENSE ADDED Viewed

	@@ -0,0 +1,114 @@

+LLAMA 3.1 COMMUNITY LICENSE AGREEMENT
+Llama 3.1 Version Release Date: July 23, 2024
+“Agreement” means the terms and conditions for use, reproduction, distribution and modification of the
+Llama Materials set forth herein.
+“Documentation” means the specifications, manuals and documentation accompanying Llama 3.1
+distributed by Meta at https://llama.meta.com/doc/overview.
+“Licensee” or “you” means you, or your employer or any other person or entity (if you are entering into
+this Agreement on such person or entity’s behalf), of the age required under applicable laws, rules or
+regulations to provide legal consent and that has legal authority to bind your employer or such other
+person or entity if you are entering in this Agreement on their behalf.
+“Llama 3.1” means the foundational large language models and software and algorithms, including
+machine-learning model code, trained model weights, inference-enabling code, training-enabling code,
+fine-tuning enabling code and other elements of the foregoing distributed by Meta at
+https://llama.meta.com/llama-downloads.
+“Llama Materials” means, collectively, Meta’s proprietary Llama 3.1 and Documentation (and any
+portion thereof) made available under this Agreement.
+“Meta” or “we” means Meta Platforms Ireland Limited (if you are located in or, if you are an entity, your
+principal place of business is in the EEA or Switzerland) and Meta Platforms, Inc. (if you are located
+outside of the EEA or Switzerland).
+By clicking “I Accept” below or by using or distributing any portion or element of the Llama Materials,
+you agree to be bound by this Agreement.
+1. License Rights and Redistribution.
+  a. Grant of Rights. You are granted a non-exclusive, worldwide, non-transferable and royalty-free
+limited license under Meta’s intellectual property or other rights owned by Meta embodied in the Llama
+Materials to use, reproduce, distribute, copy, create derivative works of, and make modifications to the
+Llama Materials.
+  b. Redistribution and Use.
+      i. If you distribute or make available the Llama Materials (or any derivative works
+thereof), or a product or service (including another AI model) that contains any of them, you shall (A)
+provide a copy of this Agreement with any such Llama Materials; and (B) prominently display “Built with
+Llama” on a related website, user interface, blogpost, about page, or product documentation. If you use
+the Llama Materials or any outputs or results of the Llama Materials to create, train, fine tune, or
+otherwise improve an AI model, which is distributed or made available, you shall also include “Llama” at
+the beginning of any such AI model name.
+      ii. If you receive Llama Materials, or any derivative works thereof, from a Licensee as part
+of an integrated end user product, then Section 2 of this Agreement will not apply to you.
+      iii. You must retain in all copies of the Llama Materials that you distribute the following
+attribution notice within a “Notice” text file distributed as a part of such copies: “Llama 3.1 is
+licensed under the Llama 3.1 Community License, Copyright © Meta Platforms, Inc. All Rights
+Reserved.”
+      iv. Your use of the Llama Materials must comply with applicable laws and regulations
+(including trade compliance laws and regulations) and adhere to the Acceptable Use Policy for the Llama
+Materials (available at https://llama.meta.com/llama3_1/use-policy), which is hereby incorporated by
+reference into this Agreement.
+2. Additional Commercial Terms. If, on the Llama 3.1 version release date, the monthly active users
+of the products or services made available by or for Licensee, or Licensee’s affiliates, is greater than 700
+million monthly active users in the preceding calendar month, you must request a license from Meta,
+which Meta may grant to you in its sole discretion, and you are not authorized to exercise any of the
+rights under this Agreement unless or until Meta otherwise expressly grants you such rights.
+3. Disclaimer of Warranty. UNLESS REQUIRED BY APPLICABLE LAW, THE LLAMA MATERIALS AND ANY
+OUTPUT AND RESULTS THEREFROM ARE PROVIDED ON AN “AS IS” BASIS, WITHOUT WARRANTIES OF
+ANY KIND, AND META DISCLAIMS ALL WARRANTIES OF ANY KIND, BOTH EXPRESS AND IMPLIED,
+INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OF TITLE, NON-INFRINGEMENT,
+MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. YOU ARE SOLELY RESPONSIBLE FOR
+DETERMINING THE APPROPRIATENESS OF USING OR REDISTRIBUTING THE LLAMA MATERIALS AND
+ASSUME ANY RISKS ASSOCIATED WITH YOUR USE OF THE LLAMA MATERIALS AND ANY OUTPUT AND
+RESULTS.
+4. Limitation of Liability. IN NO EVENT WILL META OR ITS AFFILIATES BE LIABLE UNDER ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, TORT, NEGLIGENCE, PRODUCTS LIABILITY, OR OTHERWISE, ARISING
+OUT OF THIS AGREEMENT, FOR ANY LOST PROFITS OR ANY INDIRECT, SPECIAL, CONSEQUENTIAL,
+INCIDENTAL, EXEMPLARY OR PUNITIVE DAMAGES, EVEN IF META OR ITS AFFILIATES HAVE BEEN ADVISED
+OF THE POSSIBILITY OF ANY OF THE FOREGOING.
+5. Intellectual Property.
+  a. No trademark licenses are granted under this Agreement, and in connection with the Llama
+Materials, neither Meta nor Licensee may use any name or mark owned by or associated with the other
+or any of its affiliates, except as required for reasonable and customary use in describing and
+redistributing the Llama Materials or as set forth in this Section 5(a). Meta hereby grants you a license to
+use “Llama” (the “Mark”) solely as required to comply with the last sentence of Section 1.b.i. You will
+comply with Meta’s brand guidelines (currently accessible at
+https://about.meta.com/brand/resources/meta/company-brand/ ). All goodwill arising out of your use
+of the Mark will inure to the benefit of Meta.
+  b. Subject to Meta’s ownership of Llama Materials and derivatives made by or for Meta, with
+respect to any derivative works and modifications of the Llama Materials that are made by you, as
+between you and Meta, you are and will be the owner of such derivative works and modifications.
+  c. If you institute litigation or other proceedings against Meta or any entity (including a
+cross-claim or counterclaim in a lawsuit) alleging that the Llama Materials or Llama 3.1 outputs or
+results, or any portion of any of the foregoing, constitutes infringement of intellectual property or other
+rights owned or licensable by you, then any licenses granted to you under this Agreement shall
+terminate as of the date such litigation or claim is filed or instituted. You will indemnify and hold
+harmless Meta from and against any claim by any third party arising out of or related to your use or
+distribution of the Llama Materials.
+6. Term and Termination. The term of this Agreement will commence upon your acceptance of this
+Agreement or access to the Llama Materials and will continue in full force and effect until terminated in
+accordance with the terms and conditions herein. Meta may terminate this Agreement if you are in
+breach of any term or condition of this Agreement. Upon termination of this Agreement, you shall delete
+and cease use of the Llama Materials. Sections 3, 4 and 7 shall survive the termination of this
+Agreement.
+7. Governing Law and Jurisdiction. This Agreement will be governed and construed under the laws of
+the State of California without regard to choice of law principles, and the UN Convention on Contracts
+for the International Sale of Goods does not apply to this Agreement. The courts of California shall have
+exclusive jurisdiction of any dispute arising out of this Agreement.

added_tokens.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "</think>": 151668,
+  "</tool_call>": 151658,
+  "</tool_response>": 151666,
+  "<think>": 151667,
+  "<tool_call>": 151657,
+  "<tool_response>": 151665,
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|file_sep|>": 151664,
+  "<|fim_middle|>": 151660,
+  "<|fim_pad|>": 151662,
+  "<|fim_prefix|>": 151659,
+  "<|fim_suffix|>": 151661,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|repo_name|>": 151663,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
+}

chat_template.jinja ADDED Viewed

	@@ -0,0 +1,61 @@

+{%- if tools %}
+    {{- '<|im_start|>system\n' }}
+    {%- if messages[0].role == 'system' %}
+        {{- messages[0].content + '\n\n' }}
+    {%- endif %}
+    {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
+    {%- for tool in tools %}
+        {{- "\n" }}
+        {{- tool | tojson }}
+    {%- endfor %}
+    {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
+{%- else %}
+    {%- if messages[0].role == 'system' %}
+        {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
+    {%- endif %}
+{%- endif %}
+{%- for message in messages %}
+    {%- if message.content is string %}
+        {%- set content = message.content %}
+    {%- else %}
+        {%- set content = '' %}
+    {%- endif %}
+    {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
+        {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
+    {%- elif message.role == "assistant" %}
+        {{- '<|im_start|>' + message.role + '\n' + content }}
+        {%- if message.tool_calls %}
+            {%- for tool_call in message.tool_calls %}
+                {%- if (loop.first and content) or (not loop.first) %}
+                    {{- '\n' }}
+                {%- endif %}
+                {%- if tool_call.function %}
+                    {%- set tool_call = tool_call.function %}
+                {%- endif %}
+                {{- '<tool_call>\n{"name": "' }}
+                {{- tool_call.name }}
+                {{- '", "arguments": ' }}
+                {%- if tool_call.arguments is string %}
+                    {{- tool_call.arguments }}
+                {%- else %}
+                    {{- tool_call.arguments | tojson }}
+                {%- endif %}
+                {{- '}\n</tool_call>' }}
+            {%- endfor %}
+        {%- endif %}
+        {{- '<|im_end|>\n' }}
+    {%- elif message.role == "tool" %}
+        {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
+            {{- '<|im_start|>user' }}
+        {%- endif %}
+        {{- '\n<tool_response>\n' }}
+        {{- content }}
+        {{- '\n</tool_response>' }}
+        {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
+            {{- '<|im_end|>\n' }}
+        {%- endif %}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|im_start|>assistant\n' }}
+{%- endif %}

config.json ADDED Viewed

	@@ -0,0 +1,72 @@

+{
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token": "<|endoftext|>",
+  "bos_token_id": 151643,
+  "dtype": "bfloat16",
+  "eos_token": "<|im_end|>",
+  "eos_token_id": 151645,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 262144,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "pad_token": "<|endoftext|>",
+  "pad_token_id": 151643,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 5000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "transformers_version": "4.57.0",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151936
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+  "bos_token_id": 151643,
+  "do_sample": true,
+  "eos_token_id": [
+    151645,
+    151643
+  ],
+  "pad_token_id": 151643,
+  "temperature": 0.7,
+  "top_k": 20,
+  "top_p": 0.8,
+  "transformers_version": "4.57.0"
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model-00001-of-00002.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c21edff7f75fe15e1704278e953a412d016ad942d3d28a3bbc18135bda794c29
+size 4967215360

model-00002-of-00002.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:75b654ac33e833a4e5d4f84c427ce5ee344fc3872e01d4ce8930b8ac00a69291
+size 3077766632

model.safetensors.index.json ADDED Viewed

	@@ -0,0 +1,406 @@

+{
+  "metadata": {
+    "total_parameters": 4022468096,
+    "total_size": 8044936192
+  },
+  "weight_map": {
+    "model.embed_tokens.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.20.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.20.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.20.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.20.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.20.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.20.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.20.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.21.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.21.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.21.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.22.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.22.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.22.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.22.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.22.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.22.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.23.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.23.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.23.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.23.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.23.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.23.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.23.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.23.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.24.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.24.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.24.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.24.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.24.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.24.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.24.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.24.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.25.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.25.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.25.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.25.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.25.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.25.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.25.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.25.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.26.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.26.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.26.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.26.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.26.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.26.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.26.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.27.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.27.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.27.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.28.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.28.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.28.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.28.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.28.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.28.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.28.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.28.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.28.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.28.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.28.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.29.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.29.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.29.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.29.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.29.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.29.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.29.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.29.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.29.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.29.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.29.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.30.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.30.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.30.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.30.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.30.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.30.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.30.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.30.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.30.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.30.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.32.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.32.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.32.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.32.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.32.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.32.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.32.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.32.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.32.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.32.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.32.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.33.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.33.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.33.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.33.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.33.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.33.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.33.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.33.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.33.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.33.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.33.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.34.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.34.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.34.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.34.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.34.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.34.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.34.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.34.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.34.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.34.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.34.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.35.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.35.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.35.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.35.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.35.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.35.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.35.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.35.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.35.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.35.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.35.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.norm.weight": "model-00002-of-00002.safetensors"
+  }
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,38 @@

+{
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "bos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|im_end|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
+size 11422654

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,240 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "151643": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151645": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151646": {
+      "content": "<|object_ref_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151647": {
+      "content": "<|object_ref_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151648": {
+      "content": "<|box_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151649": {
+      "content": "<|box_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151650": {
+      "content": "<|quad_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151651": {
+      "content": "<|quad_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151652": {
+      "content": "<|vision_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151653": {
+      "content": "<|vision_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151654": {
+      "content": "<|vision_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151655": {
+      "content": "<|image_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151656": {
+      "content": "<|video_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151657": {
+      "content": "<tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151658": {
+      "content": "</tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151659": {
+      "content": "<|fim_prefix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151660": {
+      "content": "<|fim_middle|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151661": {
+      "content": "<|fim_suffix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151662": {
+      "content": "<|fim_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151663": {
+      "content": "<|repo_name|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151664": {
+      "content": "<|file_sep|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151665": {
+      "content": "<tool_response>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151666": {
+      "content": "</tool_response>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151667": {
+      "content": "<think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151668": {
+      "content": "</think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "bos_token": "<|endoftext|>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "model_max_length": 1010000,
+  "pad_token": "<|endoftext|>",
+  "padding_side": "left",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff