Priyansu19 commited on
Commit
dfcebf2
·
verified ·
1 Parent(s): 3b35b9e

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
LICENSE ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ GENERAL TERMS AND CONDITIONS
2
+
3
+ Note that if you want to use the Commercial licence, please contact us at contact@distillabs.ai
4
+
5
+ - Model License Terms -
6
+
7
+ R&D License
8
+
9
+ 1. SERVICES, PRICES AND PAYMENT
10
+
11
+ 1.1 The Customer pays a one-time license fee, as indicated in the check-out process, for running of one (1) training process of the selected Base Model using Customer Data (“License Fee”).
12
+
13
+ 1.2 The License Fee shall be due for payment in advance. The Customer shall only be permitted to set off against payment claims of Distil Labs if the Customer’s claims are undisputed or have become res judicata.
14
+
15
+ 2. MODEL LICENSE: R&D LICENSE
16
+
17
+ 2.1 Subject to Customer’s payment of the license fee, Distil Labs grants to Customer the Model License (as defined below). For clarification, Distil Labs retains any other rights in its software or know- how, in particular in the codebase needed for the fine-tuning of the Trained Model.
18
+
19
+ 2.2 Subject to the requirements of the Base Model License (cf. Section 2.5 below), Distil Labs transfers to the Customer the perpetual, non-exclusive usage right to the Trained Model for non-commercial purposes of prototyping and research & development. The Parties agree, that commercial purposes include deployment in production externally (to be used by Customer’s customers paid or free of charge) or internally (as a tool for Customer’s employees). The territorial scope of the license is limited to the use within the United States of America and the European Economic Area including all member states of the European Union (“Model License”).
20
+
21
+ 2.3 The Model License for non-commercial purposes of prototyping and research & development shall include (i) the non-exclusive right to permanent or temporary reproduction, in whole or in part, by any means and in any form (e.g. permanent and/or volatile storage on electrical, electromagnetic, optical storage media, such as any type of SDD, HDD, DVD, memory cards, USB sticks), (ii) the non-exclusive right to distribution in any form, media and by any means regardless of whether the distribution is in tangible or intangible form, in particular to transmit the Trained Model via wired and wireless networks (e.g. for download from internet or intranet by wire or wireless means including broadband, cable, fiberglass, WIFI, LTE, 5G, satellite internet, other data networks), and (iii) the non-exclusive right of making available to the public in such a way that members of the public can access it from places and at times of their choice (e.g. by web or mobile app, virtual or augmented reality, cloud storage, cloud hosting, decentralized hosting, non-fungible token, application service providing, software as a service, or cloud computing). The license shall also contain, to the extent necessary for prototyping and research & development, the right to adapt and modify the Trained Model subject to the limitation in Section 2.4 and 2.5 below, to further develop the Trained Model including changes to functions or appearance, adapt to other software versions, to exchange parts of the Trained Model or combine the Trained Model with other results of work and to use the results in the same way as the original Trained Model. Any derived models from the Trained Model shall retain this model license.
22
+
23
+ 2.4 The Customer shall not, without the prior written consent of Distil Labs:
24
+
25
+ 2.4.1 train, fine-tune, re-train, or otherwise modify the Trained Model, unless for purpose of research & development;
26
+
27
+ 2.4.2 use the Trained Model or any part thereof to create derivative models or services that compete with those of Distil Labs;
28
+
29
+ 2.4.3 circumvent any technical restrictions embedded in the Trained Model or Base Model that are designed to enforce usage limitations.
30
+
31
+ 2.5 The Parties acknowledge and agree that the Trained Model is developed from Base Models which are supplied by a third party. Therefore, the Model License is subject to the restrictions resulting from the open-source or any other applicable license of the Base Model (“Base Model License”) and the Customer must use the Trained Model in compliance with the Base Model License. In particular, the Customer must oblige their clients to compliance with the Base Model License in any case of transferring or sublicensing the rights to or making available in any way the Trained Model. The applicable Base Model License is defined in the Training Configuration and will be provided for download. The Customer agrees to indemnify Distil Labs for any and all claims brought by the Base Model provider for violations of the Base Model License.
README.md ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ tags: []
4
+ ---
5
+
STUDENT_LICENSE ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Copyright 2023 Qwen
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
TEACHER_LICENSE ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Copyright 2025 OpenAI
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
added_tokens.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</think>": 151668,
3
+ "</tool_call>": 151658,
4
+ "</tool_response>": 151666,
5
+ "<think>": 151667,
6
+ "<tool_call>": 151657,
7
+ "<tool_response>": 151665,
8
+ "<|box_end|>": 151649,
9
+ "<|box_start|>": 151648,
10
+ "<|endoftext|>": 151643,
11
+ "<|file_sep|>": 151664,
12
+ "<|fim_middle|>": 151660,
13
+ "<|fim_pad|>": 151662,
14
+ "<|fim_prefix|>": 151659,
15
+ "<|fim_suffix|>": 151661,
16
+ "<|im_end|>": 151645,
17
+ "<|im_start|>": 151644,
18
+ "<|image_pad|>": 151655,
19
+ "<|object_ref_end|>": 151647,
20
+ "<|object_ref_start|>": 151646,
21
+ "<|quad_end|>": 151651,
22
+ "<|quad_start|>": 151650,
23
+ "<|repo_name|>": 151663,
24
+ "<|video_pad|>": 151656,
25
+ "<|vision_end|>": 151653,
26
+ "<|vision_pad|>": 151654,
27
+ "<|vision_start|>": 151652
28
+ }
chat_template.jinja ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if tools %}
2
+ {{- '<|im_start|>system\n' }}
3
+ {%- if messages[0].role == 'system' %}
4
+ {{- messages[0].content + '\n\n' }}
5
+ {%- endif %}
6
+ {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
7
+ {%- for tool in tools %}
8
+ {{- "\n" }}
9
+ {{- tool | tojson }}
10
+ {%- endfor %}
11
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
12
+ {%- else %}
13
+ {%- if messages[0].role == 'system' %}
14
+ {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
15
+ {%- endif %}
16
+ {%- endif %}
17
+ {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
18
+ {%- for message in messages[::-1] %}
19
+ {%- set index = (messages|length - 1) - loop.index0 %}
20
+ {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
21
+ {%- set ns.multi_step_tool = false %}
22
+ {%- set ns.last_query_index = index %}
23
+ {%- endif %}
24
+ {%- endfor %}
25
+ {%- for message in messages %}
26
+ {%- if message.content is string %}
27
+ {%- set content = message.content %}
28
+ {%- else %}
29
+ {%- set content = '' %}
30
+ {%- endif %}
31
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
32
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
33
+ {%- elif message.role == "assistant" %}
34
+ {%- set reasoning_content = '' %}
35
+ {%- if message.reasoning_content is string %}
36
+ {%- set reasoning_content = message.reasoning_content %}
37
+ {%- else %}
38
+ {%- if '</think>' in content %}
39
+ {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
40
+ {%- set content = content.split('</think>')[-1].lstrip('\n') %}
41
+ {%- endif %}
42
+ {%- endif %}
43
+ {%- if loop.index0 > ns.last_query_index %}
44
+ {%- if loop.last or (not loop.last and reasoning_content) %}
45
+ {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
46
+ {%- else %}
47
+ {{- '<|im_start|>' + message.role + '\n' + content }}
48
+ {%- endif %}
49
+ {%- else %}
50
+ {{- '<|im_start|>' + message.role + '\n' + content }}
51
+ {%- endif %}
52
+ {%- if message.tool_calls %}
53
+ {%- for tool_call in message.tool_calls %}
54
+ {%- if (loop.first and content) or (not loop.first) %}
55
+ {{- '\n' }}
56
+ {%- endif %}
57
+ {%- if tool_call.function %}
58
+ {%- set tool_call = tool_call.function %}
59
+ {%- endif %}
60
+ {{- '<tool_call>\n{"name": "' }}
61
+ {{- tool_call.name }}
62
+ {{- '", "arguments": ' }}
63
+ {%- if tool_call.arguments is string %}
64
+ {{- tool_call.arguments }}
65
+ {%- else %}
66
+ {{- tool_call.arguments | tojson }}
67
+ {%- endif %}
68
+ {{- '}\n</tool_call>' }}
69
+ {%- endfor %}
70
+ {%- endif %}
71
+ {{- '<|im_end|>\n' }}
72
+ {%- elif message.role == "tool" %}
73
+ {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
74
+ {{- '<|im_start|>user' }}
75
+ {%- endif %}
76
+ {{- '\n<tool_response>\n' }}
77
+ {{- content }}
78
+ {{- '\n</tool_response>' }}
79
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
80
+ {{- '<|im_end|>\n' }}
81
+ {%- endif %}
82
+ {%- endif %}
83
+ {%- endfor %}
84
+ {%- if add_generation_prompt %}
85
+ {{- '<|im_start|>assistant\n' }}
86
+ {%- if enable_thinking is defined and enable_thinking is false %}
87
+ {{- '<think>\n\n</think>\n\n' }}
88
+ {%- endif %}
89
+ {%- endif %}
config.json ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token": "<|endoftext|>",
8
+ "bos_token_id": 151643,
9
+ "dtype": "bfloat16",
10
+ "eos_token": "<|im_end|>",
11
+ "eos_token_id": 151645,
12
+ "head_dim": 128,
13
+ "hidden_act": "silu",
14
+ "hidden_size": 4096,
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 12288,
17
+ "layer_types": [
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention",
49
+ "full_attention",
50
+ "full_attention",
51
+ "full_attention",
52
+ "full_attention",
53
+ "full_attention"
54
+ ],
55
+ "max_position_embeddings": 40960,
56
+ "max_window_layers": 36,
57
+ "model_type": "qwen3",
58
+ "num_attention_heads": 32,
59
+ "num_hidden_layers": 36,
60
+ "num_key_value_heads": 8,
61
+ "pad_token": "<|endoftext|>",
62
+ "pad_token_id": 151643,
63
+ "rms_norm_eps": 1e-06,
64
+ "rope_scaling": null,
65
+ "rope_theta": 1000000,
66
+ "sliding_window": null,
67
+ "tie_word_embeddings": false,
68
+ "transformers_version": "4.57.0",
69
+ "use_cache": true,
70
+ "use_sliding_window": false,
71
+ "vocab_size": 151936
72
+ }
generation_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 151645,
6
+ 151643
7
+ ],
8
+ "pad_token_id": 151643,
9
+ "temperature": 0.6,
10
+ "top_k": 20,
11
+ "top_p": 0.95,
12
+ "transformers_version": "4.57.0"
13
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model-00001-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:377e3986e10351541c22baf7f2ef338eb4174fb3f917a8efbe8e33684c5c9232
3
+ size 4902257696
model-00002-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89c9aaa9daf6a6cc1ea93f01c5ca264d9127faa3e98502e58e2d7a49f44c6ca6
3
+ size 4915960368
model-00003-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f16054d6c580a5c3e7a81d0f1499f1699cd6fae97727cb18014ba4376d74d218
3
+ size 4983068496
model-00004-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb73f466fade5716702bda38d4e3b321c9358c39889e46fb9d613fb038bfcb2f
3
+ size 1580230264
model.safetensors.index.json ADDED
@@ -0,0 +1,407 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_parameters": 8190735360,
4
+ "total_size": 16381470720
5
+ },
6
+ "weight_map": {
7
+ "lm_head.weight": "model-00004-of-00004.safetensors",
8
+ "model.embed_tokens.weight": "model-00001-of-00004.safetensors",
9
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
10
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
11
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
12
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
13
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
14
+ "model.layers.0.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
15
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
16
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
17
+ "model.layers.0.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
18
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
19
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
20
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors",
21
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
22
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
23
+ "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
24
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
25
+ "model.layers.1.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
26
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
27
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
28
+ "model.layers.1.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
29
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
30
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
31
+ "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors",
32
+ "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
33
+ "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
34
+ "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
35
+ "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
36
+ "model.layers.10.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
37
+ "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
38
+ "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
39
+ "model.layers.10.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
40
+ "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
41
+ "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
42
+ "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors",
43
+ "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
44
+ "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
45
+ "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
46
+ "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
47
+ "model.layers.11.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
48
+ "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
49
+ "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
50
+ "model.layers.11.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
51
+ "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
52
+ "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
53
+ "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors",
54
+ "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
55
+ "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
56
+ "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
57
+ "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
58
+ "model.layers.12.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
59
+ "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
60
+ "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
61
+ "model.layers.12.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
62
+ "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
63
+ "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
64
+ "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors",
65
+ "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
66
+ "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
67
+ "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
68
+ "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
69
+ "model.layers.13.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
70
+ "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
71
+ "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
72
+ "model.layers.13.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
73
+ "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
74
+ "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
75
+ "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors",
76
+ "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
77
+ "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
78
+ "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
79
+ "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
80
+ "model.layers.14.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
81
+ "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
82
+ "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
83
+ "model.layers.14.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
84
+ "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
85
+ "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
86
+ "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors",
87
+ "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
88
+ "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
89
+ "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
90
+ "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
91
+ "model.layers.15.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
92
+ "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
93
+ "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
94
+ "model.layers.15.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
95
+ "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
96
+ "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
97
+ "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors",
98
+ "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
99
+ "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
100
+ "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
101
+ "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
102
+ "model.layers.16.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
103
+ "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
104
+ "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
105
+ "model.layers.16.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
106
+ "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
107
+ "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
108
+ "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors",
109
+ "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
110
+ "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
111
+ "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
112
+ "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
113
+ "model.layers.17.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
114
+ "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
115
+ "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
116
+ "model.layers.17.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
117
+ "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
118
+ "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
119
+ "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors",
120
+ "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
121
+ "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
122
+ "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
123
+ "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
124
+ "model.layers.18.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
125
+ "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
126
+ "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
127
+ "model.layers.18.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
128
+ "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
129
+ "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
130
+ "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors",
131
+ "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
132
+ "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
133
+ "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
134
+ "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
135
+ "model.layers.19.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
136
+ "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
137
+ "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
138
+ "model.layers.19.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
139
+ "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
140
+ "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
141
+ "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
142
+ "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
143
+ "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
144
+ "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
145
+ "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
146
+ "model.layers.2.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
147
+ "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
148
+ "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
149
+ "model.layers.2.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
150
+ "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
151
+ "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
152
+ "model.layers.20.input_layernorm.weight": "model-00002-of-00004.safetensors",
153
+ "model.layers.20.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
154
+ "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
155
+ "model.layers.20.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
156
+ "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
157
+ "model.layers.20.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
158
+ "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
159
+ "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
160
+ "model.layers.20.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
161
+ "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
162
+ "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
163
+ "model.layers.21.input_layernorm.weight": "model-00002-of-00004.safetensors",
164
+ "model.layers.21.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
165
+ "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
166
+ "model.layers.21.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
167
+ "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
168
+ "model.layers.21.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
169
+ "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
170
+ "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
171
+ "model.layers.21.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
172
+ "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
173
+ "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
174
+ "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors",
175
+ "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
176
+ "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
177
+ "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
178
+ "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
179
+ "model.layers.22.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
180
+ "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
181
+ "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
182
+ "model.layers.22.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
183
+ "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
184
+ "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
185
+ "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors",
186
+ "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
187
+ "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
188
+ "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
189
+ "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
190
+ "model.layers.23.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
191
+ "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
192
+ "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
193
+ "model.layers.23.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
194
+ "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
195
+ "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
196
+ "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors",
197
+ "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
198
+ "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
199
+ "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
200
+ "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
201
+ "model.layers.24.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
202
+ "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
203
+ "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
204
+ "model.layers.24.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
205
+ "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
206
+ "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
207
+ "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors",
208
+ "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
209
+ "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
210
+ "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
211
+ "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
212
+ "model.layers.25.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
213
+ "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
214
+ "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
215
+ "model.layers.25.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
216
+ "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
217
+ "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
218
+ "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors",
219
+ "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
220
+ "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
221
+ "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
222
+ "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
223
+ "model.layers.26.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
224
+ "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
225
+ "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
226
+ "model.layers.26.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
227
+ "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
228
+ "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
229
+ "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors",
230
+ "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
231
+ "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
232
+ "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
233
+ "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
234
+ "model.layers.27.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
235
+ "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
236
+ "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
237
+ "model.layers.27.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
238
+ "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
239
+ "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
240
+ "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors",
241
+ "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
242
+ "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
243
+ "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
244
+ "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
245
+ "model.layers.28.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
246
+ "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
247
+ "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
248
+ "model.layers.28.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
249
+ "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
250
+ "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
251
+ "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors",
252
+ "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
253
+ "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
254
+ "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
255
+ "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
256
+ "model.layers.29.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
257
+ "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
258
+ "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
259
+ "model.layers.29.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
260
+ "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
261
+ "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
262
+ "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
263
+ "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
264
+ "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
265
+ "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
266
+ "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
267
+ "model.layers.3.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
268
+ "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
269
+ "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
270
+ "model.layers.3.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
271
+ "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
272
+ "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
273
+ "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors",
274
+ "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
275
+ "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
276
+ "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
277
+ "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
278
+ "model.layers.30.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
279
+ "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
280
+ "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
281
+ "model.layers.30.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
282
+ "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
283
+ "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
284
+ "model.layers.31.input_layernorm.weight": "model-00003-of-00004.safetensors",
285
+ "model.layers.31.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
286
+ "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
287
+ "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
288
+ "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
289
+ "model.layers.31.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
290
+ "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
291
+ "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
292
+ "model.layers.31.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
293
+ "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
294
+ "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
295
+ "model.layers.32.input_layernorm.weight": "model-00003-of-00004.safetensors",
296
+ "model.layers.32.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
297
+ "model.layers.32.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
298
+ "model.layers.32.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
299
+ "model.layers.32.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
300
+ "model.layers.32.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
301
+ "model.layers.32.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
302
+ "model.layers.32.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
303
+ "model.layers.32.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
304
+ "model.layers.32.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
305
+ "model.layers.32.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
306
+ "model.layers.33.input_layernorm.weight": "model-00003-of-00004.safetensors",
307
+ "model.layers.33.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
308
+ "model.layers.33.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
309
+ "model.layers.33.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
310
+ "model.layers.33.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
311
+ "model.layers.33.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
312
+ "model.layers.33.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
313
+ "model.layers.33.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
314
+ "model.layers.33.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
315
+ "model.layers.33.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
316
+ "model.layers.33.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
317
+ "model.layers.34.input_layernorm.weight": "model-00003-of-00004.safetensors",
318
+ "model.layers.34.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
319
+ "model.layers.34.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
320
+ "model.layers.34.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
321
+ "model.layers.34.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
322
+ "model.layers.34.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
323
+ "model.layers.34.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
324
+ "model.layers.34.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
325
+ "model.layers.34.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
326
+ "model.layers.34.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
327
+ "model.layers.34.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
328
+ "model.layers.35.input_layernorm.weight": "model-00004-of-00004.safetensors",
329
+ "model.layers.35.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
330
+ "model.layers.35.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
331
+ "model.layers.35.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
332
+ "model.layers.35.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
333
+ "model.layers.35.self_attn.k_norm.weight": "model-00004-of-00004.safetensors",
334
+ "model.layers.35.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
335
+ "model.layers.35.self_attn.o_proj.weight": "model-00004-of-00004.safetensors",
336
+ "model.layers.35.self_attn.q_norm.weight": "model-00004-of-00004.safetensors",
337
+ "model.layers.35.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
338
+ "model.layers.35.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
339
+ "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
340
+ "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
341
+ "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
342
+ "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
343
+ "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
344
+ "model.layers.4.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
345
+ "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
346
+ "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
347
+ "model.layers.4.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
348
+ "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
349
+ "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
350
+ "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors",
351
+ "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
352
+ "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
353
+ "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
354
+ "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
355
+ "model.layers.5.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
356
+ "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
357
+ "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
358
+ "model.layers.5.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
359
+ "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
360
+ "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
361
+ "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors",
362
+ "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
363
+ "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
364
+ "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
365
+ "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
366
+ "model.layers.6.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
367
+ "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
368
+ "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
369
+ "model.layers.6.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
370
+ "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
371
+ "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
372
+ "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors",
373
+ "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
374
+ "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
375
+ "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
376
+ "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
377
+ "model.layers.7.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
378
+ "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
379
+ "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
380
+ "model.layers.7.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
381
+ "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
382
+ "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
383
+ "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors",
384
+ "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
385
+ "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
386
+ "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
387
+ "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
388
+ "model.layers.8.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
389
+ "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
390
+ "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
391
+ "model.layers.8.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
392
+ "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
393
+ "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
394
+ "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors",
395
+ "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
396
+ "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
397
+ "model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
398
+ "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
399
+ "model.layers.9.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
400
+ "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
401
+ "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
402
+ "model.layers.9.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
403
+ "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
404
+ "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
405
+ "model.norm.weight": "model-00004-of-00004.safetensors"
406
+ }
407
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "bos_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "eos_token": {
25
+ "content": "<|im_end|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ },
31
+ "pad_token": {
32
+ "content": "<|endoftext|>",
33
+ "lstrip": false,
34
+ "normalized": false,
35
+ "rstrip": false,
36
+ "single_word": false
37
+ }
38
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
3
+ size 11422654
tokenizer_config.json ADDED
@@ -0,0 +1,240 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ },
181
+ "151665": {
182
+ "content": "<tool_response>",
183
+ "lstrip": false,
184
+ "normalized": false,
185
+ "rstrip": false,
186
+ "single_word": false,
187
+ "special": false
188
+ },
189
+ "151666": {
190
+ "content": "</tool_response>",
191
+ "lstrip": false,
192
+ "normalized": false,
193
+ "rstrip": false,
194
+ "single_word": false,
195
+ "special": false
196
+ },
197
+ "151667": {
198
+ "content": "<think>",
199
+ "lstrip": false,
200
+ "normalized": false,
201
+ "rstrip": false,
202
+ "single_word": false,
203
+ "special": false
204
+ },
205
+ "151668": {
206
+ "content": "</think>",
207
+ "lstrip": false,
208
+ "normalized": false,
209
+ "rstrip": false,
210
+ "single_word": false,
211
+ "special": false
212
+ }
213
+ },
214
+ "additional_special_tokens": [
215
+ "<|im_start|>",
216
+ "<|im_end|>",
217
+ "<|object_ref_start|>",
218
+ "<|object_ref_end|>",
219
+ "<|box_start|>",
220
+ "<|box_end|>",
221
+ "<|quad_start|>",
222
+ "<|quad_end|>",
223
+ "<|vision_start|>",
224
+ "<|vision_end|>",
225
+ "<|vision_pad|>",
226
+ "<|image_pad|>",
227
+ "<|video_pad|>"
228
+ ],
229
+ "bos_token": "<|endoftext|>",
230
+ "clean_up_tokenization_spaces": false,
231
+ "eos_token": "<|im_end|>",
232
+ "errors": "replace",
233
+ "extra_special_tokens": {},
234
+ "model_max_length": 131072,
235
+ "pad_token": "<|endoftext|>",
236
+ "padding_side": "left",
237
+ "split_special_tokens": false,
238
+ "tokenizer_class": "Qwen2Tokenizer",
239
+ "unk_token": null
240
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff