Babsie hjc-puro commited on
Commit
41d9f24
·
0 Parent(s):

Duplicate from NousResearch/Hermes-4-14B

Browse files

Co-authored-by: Roger Jin <hjc-puro@users.noreply.huggingface.co>

.gitattributes ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: Qwen/Qwen3-14B
3
+ language:
4
+ - en
5
+ library_name: transformers
6
+ license: apache-2.0
7
+ pipeline_tag: text-generation
8
+ tags:
9
+ - Qwen-3-14B
10
+ - instruct
11
+ - finetune
12
+ - reasoning
13
+ - hybrid-mode
14
+ - chatml
15
+ - function calling
16
+ - tool use
17
+ - json mode
18
+ - structured outputs
19
+ - atropos
20
+ - dataforge
21
+ - long context
22
+ - roleplaying
23
+ - chat
24
+ widget:
25
+ - example_title: Hermes 4
26
+ messages:
27
+ - role: system
28
+ content: You are Hermes 4, a capable, neutrally-aligned assistant. Prefer concise,
29
+ correct answers.
30
+ - role: user
31
+ content: Explain the difference between BFS and DFS to a new CS student.
32
+ model-index:
33
+ - name: Hermes-4-Qwen-3-14B
34
+ results: []
35
+ ---
36
+
37
+ # Hermes 4 — Qwen 3 14B
38
+
39
+ [\ud83d\udcda Paper (Hugging Face)](https://huggingface.co/papers/2508.18255) | [\ud83d\udcda Paper (arXiv)](https://arxiv.org/abs/2508.18255) | [\ud83c\udf10 Project Page](https://huggingface.co/collections/NousResearch/hermes-4-collection-68a731bfd452e20816725728) | [\ud83d\udcbb GitHub Repository](https://github.com/NousResearch/Hermes-4-14B)
40
+
41
+ ![image/png](https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/7B7nMvHJiL72QzVBEPKOG.png)
42
+
43
+ ## Model Description
44
+
45
+ Hermes 4 14B is a frontier, hybrid-mode **reasoning** model based on Qwen 3 14B by Nous Research that is aligned to **you**.
46
+
47
+ Chat with Hermes in Nous Chat: https://chat.nousresearch.com
48
+
49
+ Training highlights include a newly synthesized post-training corpus emphasizing verified reasoning traces, massive improvements in math, code, STEM, logic, creativity, and format-faithful outputs, while preserving general assistant quality and broadly neutral alignment.
50
+
51
+
52
+ ## What’s new vs Hermes 3
53
+
54
+ - **Post-training corpus**: Massively increased dataset size from 1M samples and 1.2B tokens to **~5M samples / ~60B tokens** blended across reasoning and non-reasoning data.
55
+ - **Hybrid reasoning mode** with explicit `<think>…</think>` segments when the model decides to deliberate, and options to make your responses faster when you want.
56
+ - **Reasoning** that is top quality, expressive, improves math, code, STEM, logic, and even creative writing and subjective responses.
57
+ - **Schema adherence & structured outputs**: trained to produce valid JSON for given schemas and to repair malformed objects.
58
+ - **Much easier to steer and align**: extreme improvements on steerability, especially on reduced refusal rates.
59
+
60
+ ## Our Mission: Frontier Capabilities Aligned to You
61
+
62
+ In pursuit of the mission of producing models that are open, steerable and capable of producing the full range of human expression, while being able to be aligned to your values, we created a new benchmark, RefusalBench, that tests the models willingness to be helpful in a variety of scenarios commonly disallowed by closed and open models.
63
+
64
+ ![image/png](https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/t_HvRYPEHV0pc8iS2zHHn.png)
65
+
66
+ Hermes 4 achieves SOTA on RefusalBench across all popular closed and open models in being helpful and conforming to your values, without censorship.
67
+
68
+ ## Benchmarks (Hermes 4 14B)
69
+
70
+ ![image/png](https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/uzbo2sGy_3PkVMJLDlPwV.png)
71
+
72
+ > Full tables, settings, and comparisons are in the technical report.
73
+
74
+ ## Prompt Format
75
+
76
+ Hermes 4 uses ChatML format with role headers and special tags.
77
+
78
+ **Basic chat:**
79
+ ```
80
+ <|im_start|>system
81
+
82
+ You are Hermes 4. Be concise and helpful.<|im_end|>
83
+ <|im_start|>user
84
+
85
+ Explain the photoelectric effect simply.<|im_end|>
86
+ <|im_start|>assistant
87
+ ```
88
+
89
+ ### Reasoning mode
90
+
91
+ Reasoning mode can be activated with the chat template via the flag `thinking=True` or by using the following system prompt:
92
+
93
+ ```
94
+ You are a deep thinking AI, you may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering. You should enclose your thoughts and internal monologue inside <think> </think> tags, and then provide your solution or response to the problem.
95
+ ```
96
+
97
+ Note that you can add any additional system instructions before or after this system message, and it will adjust the models policies, style, and effort of thinking, as well as its post-thinking style, format, identity, and more. You may also interleave the tool definition system message with the reasoning one.
98
+
99
+ When the model chooses to deliberate, it emits:
100
+
101
+ ```
102
+ <|im_start|>assistant
103
+ <think>
104
+ …model’s internal reasoning may appear here…
105
+ </think>
106
+ Final response starts here…<|im_end|>
107
+ ```
108
+
109
+ Additionally, we provide a flag to keep the content inbetween the `<think> ... </think>` that you can play with by setting `keep_cots=True`
110
+
111
+
112
+ ## Function Calling & Tool Use
113
+
114
+ Hermes 4 supports function/tool calls *within* a single assistant turn, produced after it's reasoning:
115
+
116
+ **System message (example):**
117
+
118
+ ```
119
+ <|im_start|>system
120
+ You are a function-calling AI. Tools are provided inside <tools>…</tools>.
121
+ When appropriate, call a tool by emitting a <tool_call>{...}</tool_call> object.
122
+ After a tool responds (as <tool_response>), continue reasoning inside <think> and produce the final answer.
123
+ <tools>
124
+ {"type":"function","function":{"name":"get_weather","description":"Get weather by city","parameters":{"type":"object","properties":{"city":{"type":"string"}},"required":["city"]}}}
125
+ </tools><|im_end|>
126
+ ```
127
+
128
+ Note that you may also simply place tool definitions into the "tools:" field of your messages, and the chat template will parse and create the system prompt for you. This also works with reasoning mode for improved accuracy of tool use.
129
+
130
+ The model will then generate tool calls within `<tool_call> {tool_call} </tool_call>` tags, for easy parsing. The tool_call tags are also added tokens, so it makes it easy to parse while streaming! There are also automatic tool parsers built-in to VLLM and SGLang for Hermes, just set the tool parser in VLLM to `hermes` and in SGLang to `qwen25`.
131
+
132
+ ## Inference Notes
133
+
134
+ - **Sampling defaults that work well:** `temperature=0.6, top_p=0.95, top_k=20`.
135
+ - **Template:** Use the ChatML chat format for Hermes 4 14B as shown above, or set `add_generation_prompt=True` when using `tokenizer.apply_chat_template(...)`.
136
+
137
+ ### Transformers example
138
+
139
+ ```python
140
+ from transformers import AutoTokenizer, AutoModelForCausalLM
141
+ import torch
142
+
143
+ model_id = "NousResearch/Hermes-4-14B"
144
+
145
+ tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
146
+ model = AutoModelForCausalLM.from_pretrained(
147
+ model_id,
148
+ torch_dtype=torch.float16,
149
+ device_map="auto"
150
+ )
151
+
152
+ messages = [
153
+ {"role":"system","content":"You are Hermes 4. Be concise."},
154
+ {"role":"user","content":"Summarize CRISPR in 3 sentences."}
155
+ ]
156
+
157
+ inputs = tokenizer.apply_chat_template(
158
+ messages, add_generation_prompt=True, return_tensors="pt"
159
+ ).to(model.device)
160
+
161
+ outputs = model.generate(
162
+ **inputs, max_new_tokens=400, temperature=0.6, top_p=0.95, top_k=20, do_sample=True
163
+ )
164
+ print(tokenizer.decode(outputs[0], skip_special_tokens=True))
165
+ ```
166
+
167
+ For production serving on multi-GPU nodes, consider tensor parallel inference engines (e.g., SGLang/vLLM backends) with prefix caching.
168
+
169
+ ## Inference Providers:
170
+
171
+ ### Nous Portal:
172
+
173
+ <a href="https://portal.nousresearch.com"><img width=256 alt="chutes logo" src="https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/6YytY7N0mjCnBQvWo3qtv.png"></a>
174
+
175
+ ### Chutes:
176
+
177
+ <a href="https://chutes.ai/app"><img width=256 alt="chutes logo" src="https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/l14AWPv6cSvaprpwK_IWY.png"></a>
178
+
179
+ ### Nebius:
180
+
181
+ <a href="https://nebius.com/services/studio-inference-service">
182
+ <picture>
183
+ <source media="(prefers-color-scheme: dark)" srcset="https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/vhL0oAomFa_awBdt2KF_x.png">
184
+ <source media="(prefers-color-scheme: light)" srcset="https://cdn-uploads.huggingface.co/production/uploads/64b21cbb2fc8324fcb1dac03/LjAfeFfAz8ac5rV-iiwj5.png">
185
+ <img width=256 alt="nebius.com logo" src="https://cdn-uploads.huggingface.co/production/uploads/64b21cbb2fc8324fcb1dac03/LjAfeFfAz8ac5rV-iiwj5.png">
186
+ </picture>
187
+ </a>
188
+
189
+ ### Luminal:
190
+
191
+ <a href="https://luminalai.com/">
192
+ <img width=256 alt="luminal logo" src="https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/FIHsRdjMMP0HUjebiuJyH.png">
193
+ </a>
194
+
195
+ # Quantized / Smaller Variants
196
+
197
+ Hermes 4 is available as BF16 original weights as well as BF16 as well as FP8 variants and GGUF variants by LM Studio.
198
+
199
+ FP8: https://huggingface.co/NousResearch/Hermes-4-14B-FP8
200
+
201
+ GGUF (Courtesy of LM Studio team!):
202
+
203
+ Hermes 4 is also available in larger sizes (e.g., 70B, 405B) with similar prompt formats.
204
+
205
+ # How to cite
206
+
207
+ ```bibtex
208
+ @misc{teknium2025hermes4technicalreport,
209
+ title={Hermes 4 Technical Report},
210
+ author={Ryan Teknium and Roger Jin and Jai Suphavadeeprasit and Dakota Mahan and Jeffrey Quesnelle and Joe Li and Chen Guang and Shannon Sands and Karan Malhotra},
211
+ year={2025},
212
+ eprint={2508.18255},
213
+ archivePrefix={arXiv},
214
+ primaryClass={cs.AI},
215
+ url={https://arxiv.org/abs/2508.18255},
216
+ }
217
+ ```
added_tokens.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</think>": 151668,
3
+ "</tool_call>": 151658,
4
+ "</tool_response>": 151666,
5
+ "<think>": 151667,
6
+ "<tool_call>": 151657,
7
+ "<tool_response>": 151665,
8
+ "<|box_end|>": 151649,
9
+ "<|box_start|>": 151648,
10
+ "<|endoftext|>": 151643,
11
+ "<|file_sep|>": 151664,
12
+ "<|fim_middle|>": 151660,
13
+ "<|fim_pad|>": 151662,
14
+ "<|fim_prefix|>": 151659,
15
+ "<|fim_suffix|>": 151661,
16
+ "<|im_end|>": 151645,
17
+ "<|im_start|>": 151644,
18
+ "<|image_pad|>": 151655,
19
+ "<|object_ref_end|>": 151647,
20
+ "<|object_ref_start|>": 151646,
21
+ "<|quad_end|>": 151651,
22
+ "<|quad_start|>": 151650,
23
+ "<|repo_name|>": 151663,
24
+ "<|video_pad|>": 151656,
25
+ "<|vision_end|>": 151653,
26
+ "<|vision_pad|>": 151654,
27
+ "<|vision_start|>": 151652
28
+ }
chat_template.jinja ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- set thinking_prompt = 'You are a deep thinking AI, you may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering. You should enclose your thoughts and internal monologue inside <think> </think> tags, and then provide your solution or response to the problem.' %}
2
+ {%- set standard_prompt = 'You are Hermes, created by Nous Research.' %}
3
+ {%- if not thinking is defined %}{% set thinking = false %}{% endif %}
4
+ {%- if not keep_cots is defined %}{% set keep_cots = false %}{% endif %}
5
+ {%- if thinking %}{%- set system_prompt = thinking_prompt %}{%- else %}{%- set system_prompt = standard_prompt %}{%- endif %}
6
+ {%- if tools %}
7
+ {{- '<|im_start|>system\n' }}
8
+ {%- if messages[0]['role'] == 'system' %}
9
+ {{- messages[0]['content'] }}
10
+ {%- else %}
11
+ {{- system_prompt }}
12
+ {%- endif %}
13
+ {{- "\n\n# Tools\n\nYou are a function calling AI model. You may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
14
+ {%- for tool in tools %}
15
+ {{- "\n" }}
16
+ {{- tool | tojson }}
17
+ {%- endfor %}
18
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": \"<function-name>\", \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
19
+ {%- else %}
20
+ {%- if messages[0]['role'] == 'system' %}
21
+ {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }}
22
+ {%- else %}
23
+ {{- '<|im_start|>system\n' + system_prompt + '<|im_end|>\n' }}
24
+ {%- endif %}
25
+ {%- endif %}
26
+ {%- for message in messages %}
27
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
28
+ {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
29
+ {%- elif (message.role == "assistant" and not message.tool_calls) %}
30
+ {{- '<|im_start|>' + message.role }}
31
+ {%- if message.content %}
32
+ {%- set content = message['content'] -%}
33
+ {%- if thinking %}
34
+ {%- if not keep_cots %}
35
+ {%- set content = '<think> </think>' + content.split('</think>', 1)[1] -%}
36
+ {%- endif %}
37
+ {%- endif %}
38
+ {{- '\n' + content + '<|im_end|>' + '\n' }}
39
+ {%- endif %}
40
+ {%- elif message.role == "assistant" %}
41
+ {{- '<|im_start|>' + message.role }}
42
+ {%- if message.content %}
43
+ {%- set content = message['content'] -%}
44
+ {%- if thinking %}
45
+ {%- if not keep_cots %}
46
+ {%- set content = '<think> </think>' + content.split('</think>', 1)[1] -%}
47
+ {%- endif %}
48
+ {%- endif %}
49
+ {{- '\n' + content }}
50
+ {%- endif %}
51
+ {%- for tool_call in message.tool_calls %}
52
+ {%- if tool_call.function is defined %}
53
+ {%- set tool_call = tool_call.function %}
54
+ {%- endif %}
55
+ {{- '\n<tool_call>\n{"name": "' }}
56
+ {{- tool_call.name }}
57
+ {{- '", "arguments": ' }}
58
+ {{- tool_call.arguments | tojson }}
59
+ {{- '}\n</tool_call>' }}
60
+ {%- endfor %}
61
+ {{- '<|im_end|>\n' }}
62
+ {%- elif message.role == "tool" %}
63
+ {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %}
64
+ {{- '<|im_start|>user' }}
65
+ {%- endif %}
66
+ {{- '\n<tool_response>\n' }}
67
+ {{- message.content }}
68
+ {{- '\n</tool_response>' }}
69
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
70
+ {{- '<|im_end|>\n' }}
71
+ {%- endif %}
72
+ {%- endif %}
73
+ {%- endfor %}
74
+ {%- if add_generation_prompt %}
75
+ {{- '<|im_start|>assistant\n' }}
76
+ {%- endif %}
77
+
config.json ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "eos_token_id": 151645,
8
+ "head_dim": 128,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 5120,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 17408,
13
+ "layer_types": [
14
+ "full_attention",
15
+ "full_attention",
16
+ "full_attention",
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention",
49
+ "full_attention",
50
+ "full_attention",
51
+ "full_attention",
52
+ "full_attention",
53
+ "full_attention"
54
+ ],
55
+ "max_position_embeddings": 40960,
56
+ "max_window_layers": 40,
57
+ "model_type": "qwen3",
58
+ "num_attention_heads": 40,
59
+ "num_hidden_layers": 40,
60
+ "num_key_value_heads": 8,
61
+ "rms_norm_eps": 1e-06,
62
+ "rope_scaling": null,
63
+ "rope_theta": 1000000,
64
+ "sliding_window": null,
65
+ "tie_word_embeddings": false,
66
+ "torch_dtype": "bfloat16",
67
+ "transformers_version": "4.54.0",
68
+ "use_cache": false,
69
+ "use_sliding_window": false,
70
+ "vocab_size": 151936
71
+ }
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "do_sample": true,
4
+ "eos_token_id": 151643,
5
+ "max_new_tokens": 2048,
6
+ "transformers_version": "4.54.0"
7
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model-00001-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf7a195adb918e2a33ed6affca15ec5ebf846b6be3594e45baf4874af753cc6e
3
+ size 4984780784
model-00002-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1314004448227043fc7c43c4f7849d0ef1be36c60c15e4132456aca94f2fae85
3
+ size 4980892048
model-00003-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2cb1a439032fbec0f115cba6b3752780d469d1aeb38c201e4ced654cd2a3b6a
3
+ size 4928485104
model-00004-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2087c0742d1d4d05843bb59b0b0c0e4bd8d5c22afc2995fa7cdd68bfccc21412
3
+ size 4980892112
model-00005-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f97b8a90a17670f7711905ae0f54527247636527a5732d2c13e88187d8f423e6
3
+ size 4928485104
model-00006-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71dc04e1d6830171022d7e4ee616bbdff8c5d8e6ec2c6628edc7a9966111330a
3
+ size 4733130504
model.safetensors.index.json ADDED
@@ -0,0 +1,451 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_parameters": 424960,
4
+ "total_size": 29536614400
5
+ },
6
+ "weight_map": {
7
+ "lm_head.weight": "model-00006-of-00006.safetensors",
8
+ "model.embed_tokens.weight": "model-00001-of-00006.safetensors",
9
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00006.safetensors",
10
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
11
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
12
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
13
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
14
+ "model.layers.0.self_attn.k_norm.weight": "model-00001-of-00006.safetensors",
15
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
16
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
17
+ "model.layers.0.self_attn.q_norm.weight": "model-00001-of-00006.safetensors",
18
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
19
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
20
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00006.safetensors",
21
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
22
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
23
+ "model.layers.1.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
24
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
25
+ "model.layers.1.self_attn.k_norm.weight": "model-00001-of-00006.safetensors",
26
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
27
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
28
+ "model.layers.1.self_attn.q_norm.weight": "model-00001-of-00006.safetensors",
29
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
30
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
31
+ "model.layers.10.input_layernorm.weight": "model-00002-of-00006.safetensors",
32
+ "model.layers.10.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
33
+ "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
34
+ "model.layers.10.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
35
+ "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
36
+ "model.layers.10.self_attn.k_norm.weight": "model-00002-of-00006.safetensors",
37
+ "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
38
+ "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
39
+ "model.layers.10.self_attn.q_norm.weight": "model-00002-of-00006.safetensors",
40
+ "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
41
+ "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
42
+ "model.layers.11.input_layernorm.weight": "model-00002-of-00006.safetensors",
43
+ "model.layers.11.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
44
+ "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
45
+ "model.layers.11.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
46
+ "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
47
+ "model.layers.11.self_attn.k_norm.weight": "model-00002-of-00006.safetensors",
48
+ "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
49
+ "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
50
+ "model.layers.11.self_attn.q_norm.weight": "model-00002-of-00006.safetensors",
51
+ "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
52
+ "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
53
+ "model.layers.12.input_layernorm.weight": "model-00003-of-00006.safetensors",
54
+ "model.layers.12.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
55
+ "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
56
+ "model.layers.12.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
57
+ "model.layers.12.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
58
+ "model.layers.12.self_attn.k_norm.weight": "model-00002-of-00006.safetensors",
59
+ "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
60
+ "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
61
+ "model.layers.12.self_attn.q_norm.weight": "model-00002-of-00006.safetensors",
62
+ "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
63
+ "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
64
+ "model.layers.13.input_layernorm.weight": "model-00003-of-00006.safetensors",
65
+ "model.layers.13.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
66
+ "model.layers.13.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
67
+ "model.layers.13.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
68
+ "model.layers.13.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
69
+ "model.layers.13.self_attn.k_norm.weight": "model-00003-of-00006.safetensors",
70
+ "model.layers.13.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
71
+ "model.layers.13.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
72
+ "model.layers.13.self_attn.q_norm.weight": "model-00003-of-00006.safetensors",
73
+ "model.layers.13.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
74
+ "model.layers.13.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
75
+ "model.layers.14.input_layernorm.weight": "model-00003-of-00006.safetensors",
76
+ "model.layers.14.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
77
+ "model.layers.14.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
78
+ "model.layers.14.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
79
+ "model.layers.14.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
80
+ "model.layers.14.self_attn.k_norm.weight": "model-00003-of-00006.safetensors",
81
+ "model.layers.14.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
82
+ "model.layers.14.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
83
+ "model.layers.14.self_attn.q_norm.weight": "model-00003-of-00006.safetensors",
84
+ "model.layers.14.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
85
+ "model.layers.14.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
86
+ "model.layers.15.input_layernorm.weight": "model-00003-of-00006.safetensors",
87
+ "model.layers.15.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
88
+ "model.layers.15.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
89
+ "model.layers.15.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
90
+ "model.layers.15.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
91
+ "model.layers.15.self_attn.k_norm.weight": "model-00003-of-00006.safetensors",
92
+ "model.layers.15.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
93
+ "model.layers.15.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
94
+ "model.layers.15.self_attn.q_norm.weight": "model-00003-of-00006.safetensors",
95
+ "model.layers.15.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
96
+ "model.layers.15.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
97
+ "model.layers.16.input_layernorm.weight": "model-00003-of-00006.safetensors",
98
+ "model.layers.16.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
99
+ "model.layers.16.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
100
+ "model.layers.16.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
101
+ "model.layers.16.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
102
+ "model.layers.16.self_attn.k_norm.weight": "model-00003-of-00006.safetensors",
103
+ "model.layers.16.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
104
+ "model.layers.16.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
105
+ "model.layers.16.self_attn.q_norm.weight": "model-00003-of-00006.safetensors",
106
+ "model.layers.16.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
107
+ "model.layers.16.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
108
+ "model.layers.17.input_layernorm.weight": "model-00003-of-00006.safetensors",
109
+ "model.layers.17.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
110
+ "model.layers.17.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
111
+ "model.layers.17.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
112
+ "model.layers.17.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
113
+ "model.layers.17.self_attn.k_norm.weight": "model-00003-of-00006.safetensors",
114
+ "model.layers.17.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
115
+ "model.layers.17.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
116
+ "model.layers.17.self_attn.q_norm.weight": "model-00003-of-00006.safetensors",
117
+ "model.layers.17.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
118
+ "model.layers.17.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
119
+ "model.layers.18.input_layernorm.weight": "model-00003-of-00006.safetensors",
120
+ "model.layers.18.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
121
+ "model.layers.18.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
122
+ "model.layers.18.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
123
+ "model.layers.18.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
124
+ "model.layers.18.self_attn.k_norm.weight": "model-00003-of-00006.safetensors",
125
+ "model.layers.18.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
126
+ "model.layers.18.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
127
+ "model.layers.18.self_attn.q_norm.weight": "model-00003-of-00006.safetensors",
128
+ "model.layers.18.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
129
+ "model.layers.18.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
130
+ "model.layers.19.input_layernorm.weight": "model-00003-of-00006.safetensors",
131
+ "model.layers.19.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
132
+ "model.layers.19.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
133
+ "model.layers.19.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
134
+ "model.layers.19.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
135
+ "model.layers.19.self_attn.k_norm.weight": "model-00003-of-00006.safetensors",
136
+ "model.layers.19.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
137
+ "model.layers.19.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
138
+ "model.layers.19.self_attn.q_norm.weight": "model-00003-of-00006.safetensors",
139
+ "model.layers.19.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
140
+ "model.layers.19.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
141
+ "model.layers.2.input_layernorm.weight": "model-00001-of-00006.safetensors",
142
+ "model.layers.2.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
143
+ "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
144
+ "model.layers.2.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
145
+ "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
146
+ "model.layers.2.self_attn.k_norm.weight": "model-00001-of-00006.safetensors",
147
+ "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
148
+ "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
149
+ "model.layers.2.self_attn.q_norm.weight": "model-00001-of-00006.safetensors",
150
+ "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
151
+ "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
152
+ "model.layers.20.input_layernorm.weight": "model-00004-of-00006.safetensors",
153
+ "model.layers.20.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
154
+ "model.layers.20.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
155
+ "model.layers.20.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
156
+ "model.layers.20.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
157
+ "model.layers.20.self_attn.k_norm.weight": "model-00003-of-00006.safetensors",
158
+ "model.layers.20.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
159
+ "model.layers.20.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
160
+ "model.layers.20.self_attn.q_norm.weight": "model-00003-of-00006.safetensors",
161
+ "model.layers.20.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
162
+ "model.layers.20.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
163
+ "model.layers.21.input_layernorm.weight": "model-00004-of-00006.safetensors",
164
+ "model.layers.21.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
165
+ "model.layers.21.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
166
+ "model.layers.21.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
167
+ "model.layers.21.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
168
+ "model.layers.21.self_attn.k_norm.weight": "model-00004-of-00006.safetensors",
169
+ "model.layers.21.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
170
+ "model.layers.21.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
171
+ "model.layers.21.self_attn.q_norm.weight": "model-00004-of-00006.safetensors",
172
+ "model.layers.21.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
173
+ "model.layers.21.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
174
+ "model.layers.22.input_layernorm.weight": "model-00004-of-00006.safetensors",
175
+ "model.layers.22.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
176
+ "model.layers.22.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
177
+ "model.layers.22.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
178
+ "model.layers.22.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
179
+ "model.layers.22.self_attn.k_norm.weight": "model-00004-of-00006.safetensors",
180
+ "model.layers.22.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
181
+ "model.layers.22.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
182
+ "model.layers.22.self_attn.q_norm.weight": "model-00004-of-00006.safetensors",
183
+ "model.layers.22.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
184
+ "model.layers.22.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
185
+ "model.layers.23.input_layernorm.weight": "model-00004-of-00006.safetensors",
186
+ "model.layers.23.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
187
+ "model.layers.23.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
188
+ "model.layers.23.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
189
+ "model.layers.23.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
190
+ "model.layers.23.self_attn.k_norm.weight": "model-00004-of-00006.safetensors",
191
+ "model.layers.23.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
192
+ "model.layers.23.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
193
+ "model.layers.23.self_attn.q_norm.weight": "model-00004-of-00006.safetensors",
194
+ "model.layers.23.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
195
+ "model.layers.23.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
196
+ "model.layers.24.input_layernorm.weight": "model-00004-of-00006.safetensors",
197
+ "model.layers.24.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
198
+ "model.layers.24.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
199
+ "model.layers.24.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
200
+ "model.layers.24.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
201
+ "model.layers.24.self_attn.k_norm.weight": "model-00004-of-00006.safetensors",
202
+ "model.layers.24.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
203
+ "model.layers.24.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
204
+ "model.layers.24.self_attn.q_norm.weight": "model-00004-of-00006.safetensors",
205
+ "model.layers.24.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
206
+ "model.layers.24.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
207
+ "model.layers.25.input_layernorm.weight": "model-00004-of-00006.safetensors",
208
+ "model.layers.25.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
209
+ "model.layers.25.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
210
+ "model.layers.25.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
211
+ "model.layers.25.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
212
+ "model.layers.25.self_attn.k_norm.weight": "model-00004-of-00006.safetensors",
213
+ "model.layers.25.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
214
+ "model.layers.25.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
215
+ "model.layers.25.self_attn.q_norm.weight": "model-00004-of-00006.safetensors",
216
+ "model.layers.25.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
217
+ "model.layers.25.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
218
+ "model.layers.26.input_layernorm.weight": "model-00004-of-00006.safetensors",
219
+ "model.layers.26.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
220
+ "model.layers.26.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
221
+ "model.layers.26.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
222
+ "model.layers.26.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
223
+ "model.layers.26.self_attn.k_norm.weight": "model-00004-of-00006.safetensors",
224
+ "model.layers.26.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
225
+ "model.layers.26.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
226
+ "model.layers.26.self_attn.q_norm.weight": "model-00004-of-00006.safetensors",
227
+ "model.layers.26.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
228
+ "model.layers.26.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
229
+ "model.layers.27.input_layernorm.weight": "model-00005-of-00006.safetensors",
230
+ "model.layers.27.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
231
+ "model.layers.27.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
232
+ "model.layers.27.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
233
+ "model.layers.27.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
234
+ "model.layers.27.self_attn.k_norm.weight": "model-00004-of-00006.safetensors",
235
+ "model.layers.27.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
236
+ "model.layers.27.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
237
+ "model.layers.27.self_attn.q_norm.weight": "model-00004-of-00006.safetensors",
238
+ "model.layers.27.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
239
+ "model.layers.27.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
240
+ "model.layers.28.input_layernorm.weight": "model-00005-of-00006.safetensors",
241
+ "model.layers.28.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
242
+ "model.layers.28.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
243
+ "model.layers.28.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
244
+ "model.layers.28.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
245
+ "model.layers.28.self_attn.k_norm.weight": "model-00005-of-00006.safetensors",
246
+ "model.layers.28.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
247
+ "model.layers.28.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
248
+ "model.layers.28.self_attn.q_norm.weight": "model-00005-of-00006.safetensors",
249
+ "model.layers.28.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
250
+ "model.layers.28.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
251
+ "model.layers.29.input_layernorm.weight": "model-00005-of-00006.safetensors",
252
+ "model.layers.29.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
253
+ "model.layers.29.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
254
+ "model.layers.29.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
255
+ "model.layers.29.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
256
+ "model.layers.29.self_attn.k_norm.weight": "model-00005-of-00006.safetensors",
257
+ "model.layers.29.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
258
+ "model.layers.29.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
259
+ "model.layers.29.self_attn.q_norm.weight": "model-00005-of-00006.safetensors",
260
+ "model.layers.29.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
261
+ "model.layers.29.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
262
+ "model.layers.3.input_layernorm.weight": "model-00001-of-00006.safetensors",
263
+ "model.layers.3.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
264
+ "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
265
+ "model.layers.3.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
266
+ "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
267
+ "model.layers.3.self_attn.k_norm.weight": "model-00001-of-00006.safetensors",
268
+ "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
269
+ "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
270
+ "model.layers.3.self_attn.q_norm.weight": "model-00001-of-00006.safetensors",
271
+ "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
272
+ "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
273
+ "model.layers.30.input_layernorm.weight": "model-00005-of-00006.safetensors",
274
+ "model.layers.30.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
275
+ "model.layers.30.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
276
+ "model.layers.30.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
277
+ "model.layers.30.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
278
+ "model.layers.30.self_attn.k_norm.weight": "model-00005-of-00006.safetensors",
279
+ "model.layers.30.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
280
+ "model.layers.30.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
281
+ "model.layers.30.self_attn.q_norm.weight": "model-00005-of-00006.safetensors",
282
+ "model.layers.30.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
283
+ "model.layers.30.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
284
+ "model.layers.31.input_layernorm.weight": "model-00005-of-00006.safetensors",
285
+ "model.layers.31.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
286
+ "model.layers.31.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
287
+ "model.layers.31.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
288
+ "model.layers.31.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
289
+ "model.layers.31.self_attn.k_norm.weight": "model-00005-of-00006.safetensors",
290
+ "model.layers.31.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
291
+ "model.layers.31.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
292
+ "model.layers.31.self_attn.q_norm.weight": "model-00005-of-00006.safetensors",
293
+ "model.layers.31.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
294
+ "model.layers.31.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
295
+ "model.layers.32.input_layernorm.weight": "model-00005-of-00006.safetensors",
296
+ "model.layers.32.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
297
+ "model.layers.32.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
298
+ "model.layers.32.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
299
+ "model.layers.32.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
300
+ "model.layers.32.self_attn.k_norm.weight": "model-00005-of-00006.safetensors",
301
+ "model.layers.32.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
302
+ "model.layers.32.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
303
+ "model.layers.32.self_attn.q_norm.weight": "model-00005-of-00006.safetensors",
304
+ "model.layers.32.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
305
+ "model.layers.32.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
306
+ "model.layers.33.input_layernorm.weight": "model-00005-of-00006.safetensors",
307
+ "model.layers.33.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
308
+ "model.layers.33.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
309
+ "model.layers.33.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
310
+ "model.layers.33.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
311
+ "model.layers.33.self_attn.k_norm.weight": "model-00005-of-00006.safetensors",
312
+ "model.layers.33.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
313
+ "model.layers.33.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
314
+ "model.layers.33.self_attn.q_norm.weight": "model-00005-of-00006.safetensors",
315
+ "model.layers.33.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
316
+ "model.layers.33.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
317
+ "model.layers.34.input_layernorm.weight": "model-00005-of-00006.safetensors",
318
+ "model.layers.34.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
319
+ "model.layers.34.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
320
+ "model.layers.34.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
321
+ "model.layers.34.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
322
+ "model.layers.34.self_attn.k_norm.weight": "model-00005-of-00006.safetensors",
323
+ "model.layers.34.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
324
+ "model.layers.34.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
325
+ "model.layers.34.self_attn.q_norm.weight": "model-00005-of-00006.safetensors",
326
+ "model.layers.34.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
327
+ "model.layers.34.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
328
+ "model.layers.35.input_layernorm.weight": "model-00006-of-00006.safetensors",
329
+ "model.layers.35.mlp.down_proj.weight": "model-00006-of-00006.safetensors",
330
+ "model.layers.35.mlp.gate_proj.weight": "model-00006-of-00006.safetensors",
331
+ "model.layers.35.mlp.up_proj.weight": "model-00006-of-00006.safetensors",
332
+ "model.layers.35.post_attention_layernorm.weight": "model-00006-of-00006.safetensors",
333
+ "model.layers.35.self_attn.k_norm.weight": "model-00005-of-00006.safetensors",
334
+ "model.layers.35.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
335
+ "model.layers.35.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
336
+ "model.layers.35.self_attn.q_norm.weight": "model-00005-of-00006.safetensors",
337
+ "model.layers.35.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
338
+ "model.layers.35.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
339
+ "model.layers.36.input_layernorm.weight": "model-00006-of-00006.safetensors",
340
+ "model.layers.36.mlp.down_proj.weight": "model-00006-of-00006.safetensors",
341
+ "model.layers.36.mlp.gate_proj.weight": "model-00006-of-00006.safetensors",
342
+ "model.layers.36.mlp.up_proj.weight": "model-00006-of-00006.safetensors",
343
+ "model.layers.36.post_attention_layernorm.weight": "model-00006-of-00006.safetensors",
344
+ "model.layers.36.self_attn.k_norm.weight": "model-00006-of-00006.safetensors",
345
+ "model.layers.36.self_attn.k_proj.weight": "model-00006-of-00006.safetensors",
346
+ "model.layers.36.self_attn.o_proj.weight": "model-00006-of-00006.safetensors",
347
+ "model.layers.36.self_attn.q_norm.weight": "model-00006-of-00006.safetensors",
348
+ "model.layers.36.self_attn.q_proj.weight": "model-00006-of-00006.safetensors",
349
+ "model.layers.36.self_attn.v_proj.weight": "model-00006-of-00006.safetensors",
350
+ "model.layers.37.input_layernorm.weight": "model-00006-of-00006.safetensors",
351
+ "model.layers.37.mlp.down_proj.weight": "model-00006-of-00006.safetensors",
352
+ "model.layers.37.mlp.gate_proj.weight": "model-00006-of-00006.safetensors",
353
+ "model.layers.37.mlp.up_proj.weight": "model-00006-of-00006.safetensors",
354
+ "model.layers.37.post_attention_layernorm.weight": "model-00006-of-00006.safetensors",
355
+ "model.layers.37.self_attn.k_norm.weight": "model-00006-of-00006.safetensors",
356
+ "model.layers.37.self_attn.k_proj.weight": "model-00006-of-00006.safetensors",
357
+ "model.layers.37.self_attn.o_proj.weight": "model-00006-of-00006.safetensors",
358
+ "model.layers.37.self_attn.q_norm.weight": "model-00006-of-00006.safetensors",
359
+ "model.layers.37.self_attn.q_proj.weight": "model-00006-of-00006.safetensors",
360
+ "model.layers.37.self_attn.v_proj.weight": "model-00006-of-00006.safetensors",
361
+ "model.layers.38.input_layernorm.weight": "model-00006-of-00006.safetensors",
362
+ "model.layers.38.mlp.down_proj.weight": "model-00006-of-00006.safetensors",
363
+ "model.layers.38.mlp.gate_proj.weight": "model-00006-of-00006.safetensors",
364
+ "model.layers.38.mlp.up_proj.weight": "model-00006-of-00006.safetensors",
365
+ "model.layers.38.post_attention_layernorm.weight": "model-00006-of-00006.safetensors",
366
+ "model.layers.38.self_attn.k_norm.weight": "model-00006-of-00006.safetensors",
367
+ "model.layers.38.self_attn.k_proj.weight": "model-00006-of-00006.safetensors",
368
+ "model.layers.38.self_attn.o_proj.weight": "model-00006-of-00006.safetensors",
369
+ "model.layers.38.self_attn.q_norm.weight": "model-00006-of-00006.safetensors",
370
+ "model.layers.38.self_attn.q_proj.weight": "model-00006-of-00006.safetensors",
371
+ "model.layers.38.self_attn.v_proj.weight": "model-00006-of-00006.safetensors",
372
+ "model.layers.39.input_layernorm.weight": "model-00006-of-00006.safetensors",
373
+ "model.layers.39.mlp.down_proj.weight": "model-00006-of-00006.safetensors",
374
+ "model.layers.39.mlp.gate_proj.weight": "model-00006-of-00006.safetensors",
375
+ "model.layers.39.mlp.up_proj.weight": "model-00006-of-00006.safetensors",
376
+ "model.layers.39.post_attention_layernorm.weight": "model-00006-of-00006.safetensors",
377
+ "model.layers.39.self_attn.k_norm.weight": "model-00006-of-00006.safetensors",
378
+ "model.layers.39.self_attn.k_proj.weight": "model-00006-of-00006.safetensors",
379
+ "model.layers.39.self_attn.o_proj.weight": "model-00006-of-00006.safetensors",
380
+ "model.layers.39.self_attn.q_norm.weight": "model-00006-of-00006.safetensors",
381
+ "model.layers.39.self_attn.q_proj.weight": "model-00006-of-00006.safetensors",
382
+ "model.layers.39.self_attn.v_proj.weight": "model-00006-of-00006.safetensors",
383
+ "model.layers.4.input_layernorm.weight": "model-00001-of-00006.safetensors",
384
+ "model.layers.4.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
385
+ "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
386
+ "model.layers.4.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
387
+ "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
388
+ "model.layers.4.self_attn.k_norm.weight": "model-00001-of-00006.safetensors",
389
+ "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
390
+ "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
391
+ "model.layers.4.self_attn.q_norm.weight": "model-00001-of-00006.safetensors",
392
+ "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
393
+ "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
394
+ "model.layers.5.input_layernorm.weight": "model-00002-of-00006.safetensors",
395
+ "model.layers.5.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
396
+ "model.layers.5.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
397
+ "model.layers.5.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
398
+ "model.layers.5.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
399
+ "model.layers.5.self_attn.k_norm.weight": "model-00001-of-00006.safetensors",
400
+ "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
401
+ "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
402
+ "model.layers.5.self_attn.q_norm.weight": "model-00001-of-00006.safetensors",
403
+ "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
404
+ "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
405
+ "model.layers.6.input_layernorm.weight": "model-00002-of-00006.safetensors",
406
+ "model.layers.6.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
407
+ "model.layers.6.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
408
+ "model.layers.6.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
409
+ "model.layers.6.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
410
+ "model.layers.6.self_attn.k_norm.weight": "model-00002-of-00006.safetensors",
411
+ "model.layers.6.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
412
+ "model.layers.6.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
413
+ "model.layers.6.self_attn.q_norm.weight": "model-00002-of-00006.safetensors",
414
+ "model.layers.6.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
415
+ "model.layers.6.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
416
+ "model.layers.7.input_layernorm.weight": "model-00002-of-00006.safetensors",
417
+ "model.layers.7.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
418
+ "model.layers.7.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
419
+ "model.layers.7.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
420
+ "model.layers.7.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
421
+ "model.layers.7.self_attn.k_norm.weight": "model-00002-of-00006.safetensors",
422
+ "model.layers.7.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
423
+ "model.layers.7.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
424
+ "model.layers.7.self_attn.q_norm.weight": "model-00002-of-00006.safetensors",
425
+ "model.layers.7.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
426
+ "model.layers.7.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
427
+ "model.layers.8.input_layernorm.weight": "model-00002-of-00006.safetensors",
428
+ "model.layers.8.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
429
+ "model.layers.8.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
430
+ "model.layers.8.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
431
+ "model.layers.8.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
432
+ "model.layers.8.self_attn.k_norm.weight": "model-00002-of-00006.safetensors",
433
+ "model.layers.8.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
434
+ "model.layers.8.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
435
+ "model.layers.8.self_attn.q_norm.weight": "model-00002-of-00006.safetensors",
436
+ "model.layers.8.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
437
+ "model.layers.8.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
438
+ "model.layers.9.input_layernorm.weight": "model-00002-of-00006.safetensors",
439
+ "model.layers.9.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
440
+ "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
441
+ "model.layers.9.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
442
+ "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
443
+ "model.layers.9.self_attn.k_norm.weight": "model-00002-of-00006.safetensors",
444
+ "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
445
+ "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
446
+ "model.layers.9.self_attn.q_norm.weight": "model-00002-of-00006.safetensors",
447
+ "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
448
+ "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
449
+ "model.norm.weight": "model-00006-of-00006.safetensors"
450
+ }
451
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|im_end|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
3
+ size 11422654
tokenizer_config.json ADDED
@@ -0,0 +1,239 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ },
181
+ "151665": {
182
+ "content": "<tool_response>",
183
+ "lstrip": false,
184
+ "normalized": false,
185
+ "rstrip": false,
186
+ "single_word": false,
187
+ "special": false
188
+ },
189
+ "151666": {
190
+ "content": "</tool_response>",
191
+ "lstrip": false,
192
+ "normalized": false,
193
+ "rstrip": false,
194
+ "single_word": false,
195
+ "special": false
196
+ },
197
+ "151667": {
198
+ "content": "<think>",
199
+ "lstrip": false,
200
+ "normalized": false,
201
+ "rstrip": false,
202
+ "single_word": false,
203
+ "special": false
204
+ },
205
+ "151668": {
206
+ "content": "</think>",
207
+ "lstrip": false,
208
+ "normalized": false,
209
+ "rstrip": false,
210
+ "single_word": false,
211
+ "special": false
212
+ }
213
+ },
214
+ "additional_special_tokens": [
215
+ "<|im_start|>",
216
+ "<|im_end|>",
217
+ "<|object_ref_start|>",
218
+ "<|object_ref_end|>",
219
+ "<|box_start|>",
220
+ "<|box_end|>",
221
+ "<|quad_start|>",
222
+ "<|quad_end|>",
223
+ "<|vision_start|>",
224
+ "<|vision_end|>",
225
+ "<|vision_pad|>",
226
+ "<|image_pad|>",
227
+ "<|video_pad|>"
228
+ ],
229
+ "bos_token": null,
230
+ "clean_up_tokenization_spaces": false,
231
+ "eos_token": "<|im_end|>",
232
+ "errors": "replace",
233
+ "extra_special_tokens": {},
234
+ "model_max_length": 131072,
235
+ "pad_token": "<|endoftext|>",
236
+ "split_special_tokens": false,
237
+ "tokenizer_class": "Qwen2Tokenizer",
238
+ "unk_token": null
239
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff