Bopalv commited on
Commit
0c2365d
·
verified ·
1 Parent(s): 4f8913f

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  Qwen3-0.6B-GGUF/Qwen3-0.6B.Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
 
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  Qwen3-0.6B-GGUF/Qwen3-0.6B.Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
37
+ Qwen3-0.6B-GPTQ-Int4/tokenizer.json filter=lfs diff=lfs merge=lfs -text
Qwen3-0.6B-GPTQ-Int4/.gitattributes ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
Qwen3-0.6B-GPTQ-Int4/README.md ADDED
@@ -0,0 +1,331 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: apache-2.0
4
+ license_link: https://huggingface.co/Qwen/Qwen3-0.6B/blob/main/LICENSE
5
+ pipeline_tag: text-generation
6
+ tags:
7
+ - Qwen3
8
+ - GPTQ
9
+ - Int4
10
+ - 量化修复
11
+ - vLLM
12
+ base_model:
13
+ - Qwen/Qwen3-0.6B
14
+ base_model_relation: quantized
15
+ ---
16
+ # Qwen3-0.6B-GPTQ-Int4
17
+ Base model: [Qwen/Qwen3-0.6B](https://huggingface.co/Qwen/Qwen3-0.6B)
18
+
19
+ <i>This model is quantized to 4-bit with a group size of 128.</i>
20
+ <br>
21
+ <i>Compared to earlier quantized versions, the new quantized model demonstrates better tokens/s efficiency. This improvement comes from setting desc_act=False in the quantization configuration.</i>
22
+
23
+ ```
24
+ vllm serve JunHowie/Qwen3-0.6B-GPTQ-Int4
25
+ ```
26
+
27
+ ### 【Dependencies】
28
+ ```
29
+ vllm>=0.9.2
30
+ ```
31
+
32
+ ### 【Model Download】
33
+
34
+ ```python
35
+ from huggingface_hub import snapshot_download
36
+ snapshot_download('JunHowie/Qwen3-0.6B-GPTQ-Int4', cache_dir="your_local_path")
37
+ ```
38
+
39
+ ### 【Overview】
40
+ # Qwen3-0.6B
41
+ <a href="https://chat.qwen.ai/" target="_blank" style="margin: 2px;">
42
+ <img alt="Chat" src="https://img.shields.io/badge/%F0%9F%92%9C%EF%B8%8F%20Qwen%20Chat%20-536af5" style="display: inline-block; vertical-align: middle;"/>
43
+ </a>
44
+
45
+ ## Qwen3 Highlights
46
+
47
+ Qwen3 is the latest generation of large language models in Qwen series, offering a comprehensive suite of dense and mixture-of-experts (MoE) models. Built upon extensive training, Qwen3 delivers groundbreaking advancements in reasoning, instruction-following, agent capabilities, and multilingual support, with the following key features:
48
+
49
+ - **Uniquely support of seamless switching between thinking mode** (for complex logical reasoning, math, and coding) and **non-thinking mode** (for efficient, general-purpose dialogue) **within single model**, ensuring optimal performance across various scenarios.
50
+ - **Significantly enhancement in its reasoning capabilities**, surpassing previous QwQ (in thinking mode) and Qwen2.5 instruct models (in non-thinking mode) on mathematics, code generation, and commonsense logical reasoning.
51
+ - **Superior human preference alignment**, excelling in creative writing, role-playing, multi-turn dialogues, and instruction following, to deliver a more natural, engaging, and immersive conversational experience.
52
+ - **Expertise in agent capabilities**, enabling precise integration with external tools in both thinking and unthinking modes and achieving leading performance among open-source models in complex agent-based tasks.
53
+ - **Support of 100+ languages and dialects** with strong capabilities for **multilingual instruction following** and **translation**.
54
+
55
+ ## Model Overview
56
+
57
+ **Qwen3-0.6B** has the following features:
58
+ - Type: Causal Language Models
59
+ - Training Stage: Pretraining & Post-training
60
+ - Number of Parameters: 0.6B
61
+ - Number of Paramaters (Non-Embedding): 0.44B
62
+ - Number of Layers: 28
63
+ - Number of Attention Heads (GQA): 16 for Q and 8 for KV
64
+ - Context Length: 32,768
65
+
66
+ For more details, including benchmark evaluation, hardware requirements, and inference performance, please refer to our [blog](https://qwenlm.github.io/blog/qwen3/), [GitHub](https://github.com/QwenLM/Qwen3), and [Documentation](https://qwen.readthedocs.io/en/latest/).
67
+
68
+ > [!TIP]
69
+ > If you encounter significant endless repetitions, please refer to the [Best Practices](#best-practices) section for optimal sampling parameters, and set the ``presence_penalty`` to 1.5.
70
+
71
+ ## Quickstart
72
+
73
+ The code of Qwen3 has been in the latest Hugging Face `transformers` and we advise you to use the latest version of `transformers`.
74
+
75
+ With `transformers<4.51.0`, you will encounter the following error:
76
+ ```
77
+ KeyError: 'qwen3'
78
+ ```
79
+
80
+ The following contains a code snippet illustrating how to use the model generate content based on given inputs.
81
+ ```python
82
+ from transformers import AutoModelForCausalLM, AutoTokenizer
83
+
84
+ model_name = "Qwen/Qwen3-0.6B"
85
+
86
+ # load the tokenizer and the model
87
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
88
+ model = AutoModelForCausalLM.from_pretrained(
89
+ model_name,
90
+ torch_dtype="auto",
91
+ device_map="auto"
92
+ )
93
+
94
+ # prepare the model input
95
+ prompt = "Give me a short introduction to large language model."
96
+ messages = [
97
+ {"role": "user", "content": prompt}
98
+ ]
99
+ text = tokenizer.apply_chat_template(
100
+ messages,
101
+ tokenize=False,
102
+ add_generation_prompt=True,
103
+ enable_thinking=True # Switches between thinking and non-thinking modes. Default is True.
104
+ )
105
+ model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
106
+
107
+ # conduct text completion
108
+ generated_ids = model.generate(
109
+ **model_inputs,
110
+ max_new_tokens=32768
111
+ )
112
+ output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()
113
+
114
+ # parsing thinking content
115
+ try:
116
+ # rindex finding 151668 (</think>)
117
+ index = len(output_ids) - output_ids[::-1].index(151668)
118
+ except ValueError:
119
+ index = 0
120
+
121
+ thinking_content = tokenizer.decode(output_ids[:index], skip_special_tokens=True).strip("\n")
122
+ content = tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n")
123
+
124
+ print("thinking content:", thinking_content)
125
+ print("content:", content)
126
+ ```
127
+
128
+ For deployment, you can use `sglang>=0.4.6.post1` or `vllm>=0.8.5` or to create an OpenAI-compatible API endpoint:
129
+ - SGLang:
130
+ ```shell
131
+ python -m sglang.launch_server --model-path Qwen/Qwen3-0.6B --reasoning-parser qwen3
132
+ ```
133
+ - vLLM:
134
+ ```shell
135
+ vllm serve Qwen/Qwen3-0.6B --enable-reasoning --reasoning-parser deepseek_r1
136
+ ```
137
+
138
+ For local use, applications such as Ollama, LMStudio, MLX-LM, llama.cpp, and KTransformers have also supported Qwen3.
139
+
140
+ ## Switching Between Thinking and Non-Thinking Mode
141
+
142
+ > [!TIP]
143
+ > The `enable_thinking` switch is also available in APIs created by SGLang and vLLM.
144
+ > Please refer to our documentation for [SGLang](https://qwen.readthedocs.io/en/latest/deployment/sglang.html#thinking-non-thinking-modes) and [vLLM](https://qwen.readthedocs.io/en/latest/deployment/vllm.html#thinking-non-thinking-modes) users.
145
+
146
+ ### `enable_thinking=True`
147
+
148
+ By default, Qwen3 has thinking capabilities enabled, similar to QwQ-32B. This means the model will use its reasoning abilities to enhance the quality of generated responses. For example, when explicitly setting `enable_thinking=True` or leaving it as the default value in `tokenizer.apply_chat_template`, the model will engage its thinking mode.
149
+
150
+ ```python
151
+ text = tokenizer.apply_chat_template(
152
+ messages,
153
+ tokenize=False,
154
+ add_generation_prompt=True,
155
+ enable_thinking=True # True is the default value for enable_thinking
156
+ )
157
+ ```
158
+
159
+ In this mode, the model will generate think content wrapped in a `<think>...</think>` block, followed by the final response.
160
+
161
+ > [!NOTE]
162
+ > For thinking mode, use `Temperature=0.6`, `TopP=0.95`, `TopK=20`, and `MinP=0` (the default setting in `generation_config.json`). **DO NOT use greedy decoding**, as it can lead to performance degradation and endless repetitions. For more detailed guidance, please refer to the [Best Practices](#best-practices) section.
163
+
164
+
165
+ ### `enable_thinking=False`
166
+
167
+ We provide a hard switch to strictly disable the model's thinking behavior, aligning its functionality with the previous Qwen2.5-Instruct models. This mode is particularly useful in scenarios where disabling thinking is essential for enhancing efficiency.
168
+
169
+ ```python
170
+ text = tokenizer.apply_chat_template(
171
+ messages,
172
+ tokenize=False,
173
+ add_generation_prompt=True,
174
+ enable_thinking=False # Setting enable_thinking=False disables thinking mode
175
+ )
176
+ ```
177
+
178
+ In this mode, the model will not generate any think content and will not include a `<think>...</think>` block.
179
+
180
+ > [!NOTE]
181
+ > For non-thinking mode, we suggest using `Temperature=0.7`, `TopP=0.8`, `TopK=20`, and `MinP=0`. For more detailed guidance, please refer to the [Best Practices](#best-practices) section.
182
+
183
+ ### Advanced Usage: Switching Between Thinking and Non-Thinking Modes via User Input
184
+
185
+ We provide a soft switch mechanism that allows users to dynamically control the model's behavior when `enable_thinking=True`. Specifically, you can add `/think` and `/no_think` to user prompts or system messages to switch the model's thinking mode from turn to turn. The model will follow the most recent instruction in multi-turn conversations.
186
+
187
+ Here is an example of a multi-turn conversation:
188
+
189
+ ```python
190
+ from transformers import AutoModelForCausalLM, AutoTokenizer
191
+
192
+ class QwenChatbot:
193
+ def __init__(self, model_name="Qwen/Qwen3-0.6B"):
194
+ self.tokenizer = AutoTokenizer.from_pretrained(model_name)
195
+ self.model = AutoModelForCausalLM.from_pretrained(model_name)
196
+ self.history = []
197
+
198
+ def generate_response(self, user_input):
199
+ messages = self.history + [{"role": "user", "content": user_input}]
200
+
201
+ text = self.tokenizer.apply_chat_template(
202
+ messages,
203
+ tokenize=False,
204
+ add_generation_prompt=True
205
+ )
206
+
207
+ inputs = self.tokenizer(text, return_tensors="pt")
208
+ response_ids = self.model.generate(**inputs, max_new_tokens=32768)[0][len(inputs.input_ids[0]):].tolist()
209
+ response = self.tokenizer.decode(response_ids, skip_special_tokens=True)
210
+
211
+ # Update history
212
+ self.history.append({"role": "user", "content": user_input})
213
+ self.history.append({"role": "assistant", "content": response})
214
+
215
+ return response
216
+
217
+ # Example Usage
218
+ if __name__ == "__main__":
219
+ chatbot = QwenChatbot()
220
+
221
+ # First input (without /think or /no_think tags, thinking mode is enabled by default)
222
+ user_input_1 = "How many r's in strawberries?"
223
+ print(f"User: {user_input_1}")
224
+ response_1 = chatbot.generate_response(user_input_1)
225
+ print(f"Bot: {response_1}")
226
+ print("----------------------")
227
+
228
+ # Second input with /no_think
229
+ user_input_2 = "Then, how many r's in blueberries? /no_think"
230
+ print(f"User: {user_input_2}")
231
+ response_2 = chatbot.generate_response(user_input_2)
232
+ print(f"Bot: {response_2}")
233
+ print("----------------------")
234
+
235
+ # Third input with /think
236
+ user_input_3 = "Really? /think"
237
+ print(f"User: {user_input_3}")
238
+ response_3 = chatbot.generate_response(user_input_3)
239
+ print(f"Bot: {response_3}")
240
+ ```
241
+
242
+ > [!NOTE]
243
+ > For API compatibility, when `enable_thinking=True`, regardless of whether the user uses `/think` or `/no_think`, the model will always output a block wrapped in `<think>...</think>`. However, the content inside this block may be empty if thinking is disabled.
244
+ > When `enable_thinking=False`, the soft switches are not valid. Regardless of any `/think` or `/no_think` tags input by the user, the model will not generate think content and will not include a `<think>...</think>` block.
245
+
246
+ ## Agentic Use
247
+
248
+ Qwen3 excels in tool calling capabilities. We recommend using [Qwen-Agent](https://github.com/QwenLM/Qwen-Agent) to make the best use of agentic ability of Qwen3. Qwen-Agent encapsulates tool-calling templates and tool-calling parsers internally, greatly reducing coding complexity.
249
+
250
+ To define the available tools, you can use the MCP configuration file, use the integrated tool of Qwen-Agent, or integrate other tools by yourself.
251
+ ```python
252
+ from qwen_agent.agents import Assistant
253
+
254
+ # Define LLM
255
+ llm_cfg = {
256
+ 'model': 'Qwen3-0.6B',
257
+
258
+ # Use the endpoint provided by Alibaba Model Studio:
259
+ # 'model_type': 'qwen_dashscope',
260
+ # 'api_key': os.getenv('DASHSCOPE_API_KEY'),
261
+
262
+ # Use a custom endpoint compatible with OpenAI API:
263
+ 'model_server': 'http://localhost:8000/v1', # api_base
264
+ 'api_key': 'EMPTY',
265
+
266
+ # Other parameters:
267
+ # 'generate_cfg': {
268
+ # # Add: When the response content is `<think>this is the thought</think>this is the answer;
269
+ # # Do not add: When the response has been separated by reasoning_content and content.
270
+ # 'thought_in_content': True,
271
+ # },
272
+ }
273
+
274
+ # Define Tools
275
+ tools = [
276
+ {'mcpServers': { # You can specify the MCP configuration file
277
+ 'time': {
278
+ 'command': 'uvx',
279
+ 'args': ['mcp-server-time', '--local-timezone=Asia/Shanghai']
280
+ },
281
+ "fetch": {
282
+ "command": "uvx",
283
+ "args": ["mcp-server-fetch"]
284
+ }
285
+ }
286
+ },
287
+ 'code_interpreter', # Built-in tools
288
+ ]
289
+
290
+ # Define Agent
291
+ bot = Assistant(llm=llm_cfg, function_list=tools)
292
+
293
+ # Streaming generation
294
+ messages = [{'role': 'user', 'content': 'https://qwenlm.github.io/blog/ Introduce the latest developments of Qwen'}]
295
+ for responses in bot.run(messages=messages):
296
+ pass
297
+ print(responses)
298
+ ```
299
+
300
+ ## Best Practices
301
+
302
+ To achieve optimal performance, we recommend the following settings:
303
+
304
+ 1. **Sampling Parameters**:
305
+ - For thinking mode (`enable_thinking=True`), use `Temperature=0.6`, `TopP=0.95`, `TopK=20`, and `MinP=0`. **DO NOT use greedy decoding**, as it can lead to performance degradation and endless repetitions.
306
+ - For non-thinking mode (`enable_thinking=False`), we suggest using `Temperature=0.7`, `TopP=0.8`, `TopK=20`, and `MinP=0`.
307
+ - For supported frameworks, you can adjust the `presence_penalty` parameter between 0 and 2 to reduce endless repetitions. However, using a higher value may occasionally result in language mixing and a slight decrease in model performance.
308
+
309
+ 2. **Adequate Output Length**: We recommend using an output length of 32,768 tokens for most queries. For benchmarking on highly complex problems, such as those found in math and programming competitions, we suggest setting the max output length to 38,912 tokens. This provides the model with sufficient space to generate detailed and comprehensive responses, thereby enhancing its overall performance.
310
+
311
+ 3. **Standardize Output Format**: We recommend using prompts to standardize model outputs when benchmarking.
312
+ - **Math Problems**: Include "Please reason step by step, and put your final answer within \boxed{}." in the prompt.
313
+ - **Multiple-Choice Questions**: Add the following JSON structure to the prompt to standardize responses: "Please show your choice in the `answer` field with only the choice letter, e.g., `"answer": "C"`."
314
+
315
+ 4. **No Thinking Content in History**: In multi-turn conversations, the historical model output should only include the final output part and does not need to include the thinking content. It is implemented in the provided chat template in Jinja2. However, for frameworks that do not directly use the Jinja2 chat template, it is up to the developers to ensure that the best practice is followed.
316
+
317
+ ### Citation
318
+
319
+ If you find our work helpful, feel free to give us a cite.
320
+
321
+ ```
322
+ @misc{qwen3technicalreport,
323
+ title={Qwen3 Technical Report},
324
+ author={Qwen Team},
325
+ year={2025},
326
+ eprint={2505.09388},
327
+ archivePrefix={arXiv},
328
+ primaryClass={cs.CL},
329
+ url={https://arxiv.org/abs/2505.09388},
330
+ }
331
+ ```
Qwen3-0.6B-GPTQ-Int4/added_tokens.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</think>": 151668,
3
+ "</tool_call>": 151658,
4
+ "</tool_response>": 151666,
5
+ "<think>": 151667,
6
+ "<tool_call>": 151657,
7
+ "<tool_response>": 151665,
8
+ "<|box_end|>": 151649,
9
+ "<|box_start|>": 151648,
10
+ "<|endoftext|>": 151643,
11
+ "<|file_sep|>": 151664,
12
+ "<|fim_middle|>": 151660,
13
+ "<|fim_pad|>": 151662,
14
+ "<|fim_prefix|>": 151659,
15
+ "<|fim_suffix|>": 151661,
16
+ "<|im_end|>": 151645,
17
+ "<|im_start|>": 151644,
18
+ "<|image_pad|>": 151655,
19
+ "<|object_ref_end|>": 151647,
20
+ "<|object_ref_start|>": 151646,
21
+ "<|quad_end|>": 151651,
22
+ "<|quad_start|>": 151650,
23
+ "<|repo_name|>": 151663,
24
+ "<|video_pad|>": 151656,
25
+ "<|vision_end|>": 151653,
26
+ "<|vision_pad|>": 151654,
27
+ "<|vision_start|>": 151652
28
+ }
Qwen3-0.6B-GPTQ-Int4/chat_template.jinja ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if tools %}
2
+ {{- '<|im_start|>system\n' }}
3
+ {%- if messages[0].role == 'system' %}
4
+ {{- messages[0].content + '\n\n' }}
5
+ {%- endif %}
6
+ {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
7
+ {%- for tool in tools %}
8
+ {{- "\n" }}
9
+ {{- tool | tojson }}
10
+ {%- endfor %}
11
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
12
+ {%- else %}
13
+ {%- if messages[0].role == 'system' %}
14
+ {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
15
+ {%- endif %}
16
+ {%- endif %}
17
+ {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
18
+ {%- for message in messages[::-1] %}
19
+ {%- set index = (messages|length - 1) - loop.index0 %}
20
+ {%- if ns.multi_step_tool and message.role == "user" and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
21
+ {%- set ns.multi_step_tool = false %}
22
+ {%- set ns.last_query_index = index %}
23
+ {%- endif %}
24
+ {%- endfor %}
25
+ {%- for message in messages %}
26
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
27
+ {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
28
+ {%- elif message.role == "assistant" %}
29
+ {%- set content = message.content %}
30
+ {%- set reasoning_content = '' %}
31
+ {%- if message.reasoning_content is defined and message.reasoning_content is not none %}
32
+ {%- set reasoning_content = message.reasoning_content %}
33
+ {%- else %}
34
+ {%- if '</think>' in message.content %}
35
+ {%- set content = message.content.split('</think>')[-1].lstrip('\n') %}
36
+ {%- set reasoning_content = message.content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
37
+ {%- endif %}
38
+ {%- endif %}
39
+ {%- if loop.index0 > ns.last_query_index %}
40
+ {%- if loop.last or (not loop.last and reasoning_content) %}
41
+ {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
42
+ {%- else %}
43
+ {{- '<|im_start|>' + message.role + '\n' + content }}
44
+ {%- endif %}
45
+ {%- else %}
46
+ {{- '<|im_start|>' + message.role + '\n' + content }}
47
+ {%- endif %}
48
+ {%- if message.tool_calls %}
49
+ {%- for tool_call in message.tool_calls %}
50
+ {%- if (loop.first and content) or (not loop.first) %}
51
+ {{- '\n' }}
52
+ {%- endif %}
53
+ {%- if tool_call.function %}
54
+ {%- set tool_call = tool_call.function %}
55
+ {%- endif %}
56
+ {{- '<tool_call>\n{"name": "' }}
57
+ {{- tool_call.name }}
58
+ {{- '", "arguments": ' }}
59
+ {%- if tool_call.arguments is string %}
60
+ {{- tool_call.arguments }}
61
+ {%- else %}
62
+ {{- tool_call.arguments | tojson }}
63
+ {%- endif %}
64
+ {{- '}\n</tool_call>' }}
65
+ {%- endfor %}
66
+ {%- endif %}
67
+ {{- '<|im_end|>\n' }}
68
+ {%- elif message.role == "tool" %}
69
+ {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
70
+ {{- '<|im_start|>user' }}
71
+ {%- endif %}
72
+ {{- '\n<tool_response>\n' }}
73
+ {{- message.content }}
74
+ {{- '\n</tool_response>' }}
75
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
76
+ {{- '<|im_end|>\n' }}
77
+ {%- endif %}
78
+ {%- endif %}
79
+ {%- endfor %}
80
+ {%- if add_generation_prompt %}
81
+ {{- '<|im_start|>assistant\n' }}
82
+ {%- if enable_thinking is defined and enable_thinking is false %}
83
+ {{- '<think>\n\n</think>\n\n' }}
84
+ {%- endif %}
85
+ {%- endif %}
Qwen3-0.6B-GPTQ-Int4/config.json ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "eos_token_id": 151645,
9
+ "head_dim": 128,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 1024,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_types": [
15
+ "full_attention",
16
+ "full_attention",
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention"
43
+ ],
44
+ "max_position_embeddings": 40960,
45
+ "max_window_layers": 28,
46
+ "model_type": "qwen3",
47
+ "num_attention_heads": 16,
48
+ "num_hidden_layers": 28,
49
+ "num_key_value_heads": 8,
50
+ "quantization_config": {
51
+ "bits": 4,
52
+ "checkpoint_format": "gptq",
53
+ "desc_act": false,
54
+ "group_size": 128,
55
+ "hyb_act": false,
56
+ "lm_head": false,
57
+ "meta": {
58
+ "damp_auto_increment": 0.01,
59
+ "damp_percent": 0.05,
60
+ "mse": 0.0,
61
+ "quantizer": [
62
+ "gptqmodel:4.0.0"
63
+ ],
64
+ "static_groups": false,
65
+ "true_sequential": true,
66
+ "uri": "https://github.com/modelcloud/gptqmodel",
67
+ "v2": false,
68
+ "v2_alpha": 0.25
69
+ },
70
+ "pack_dtype": "int32",
71
+ "quant_method": "gptq",
72
+ "sym": true
73
+ },
74
+ "rms_norm_eps": 1e-06,
75
+ "rope_scaling": null,
76
+ "rope_theta": 1000000,
77
+ "sliding_window": null,
78
+ "tie_word_embeddings": true,
79
+ "torch_dtype": "bfloat16",
80
+ "transformers_version": "4.55.4",
81
+ "use_cache": true,
82
+ "use_sliding_window": false,
83
+ "vocab_size": 151936
84
+ }
Qwen3-0.6B-GPTQ-Int4/generation_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 151645,
6
+ 151643
7
+ ],
8
+ "pad_token_id": 151643,
9
+ "temperature": 0.6,
10
+ "top_k": 20,
11
+ "top_p": 0.95,
12
+ "transformers_version": "4.55.4"
13
+ }
Qwen3-0.6B-GPTQ-Int4/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
Qwen3-0.6B-GPTQ-Int4/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5731564803785c3d04797f99fd98f93937169de47996c8e7f915b1bed420a5c6
3
+ size 541343840
Qwen3-0.6B-GPTQ-Int4/quant_log.csv ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ layer,module,loss,samples,damp,time
2
+ 0,self_attn.k_proj,0.09850946,0.01000,0.851
3
+ 0,self_attn.v_proj,0.07586420,0.01000,0.408
4
+ 0,self_attn.q_proj,0.22626629,0.01000,0.427
5
+ 0,self_attn.o_proj,0.06250948,0.01000,0.889
6
+ 0,mlp.up_proj,1.20686150,0.01000,0.456
7
+ 0,mlp.gate_proj,2.40769720,0.01000,0.444
8
+ 0,mlp.down_proj,0.06865632,0.01000,1.604
9
+ 1,self_attn.k_proj,0.04139863,0.01000,0.474
10
+ 1,self_attn.v_proj,0.03884006,0.01000,0.423
11
+ 1,self_attn.q_proj,0.09340510,0.01000,0.514
12
+ 1,self_attn.o_proj,0.01172515,0.01000,0.988
13
+ 1,mlp.up_proj,4.21653891,0.01000,0.711
14
+ 1,mlp.gate_proj,14.37671852,0.01000,0.726
15
+ 1,mlp.down_proj,0.08684395,0.01000,2.226
16
+ 2,self_attn.k_proj,0.07648005,0.01000,0.703
17
+ 2,self_attn.v_proj,0.07283711,0.01000,0.739
18
+ 2,self_attn.q_proj,0.17951047,0.01000,0.719
19
+ 2,self_attn.o_proj,0.02008994,0.01000,1.110
20
+ 2,mlp.up_proj,3.53466392,0.01000,0.649
21
+ 2,mlp.gate_proj,9.86164284,0.01000,0.658
22
+ 2,mlp.down_proj,3.31883287,0.01000,1.182
23
+ 3,self_attn.k_proj,0.67591071,0.01000,0.422
24
+ 3,self_attn.v_proj,0.68356144,0.01000,0.390
25
+ 3,self_attn.q_proj,1.38537908,0.01000,0.403
26
+ 3,self_attn.o_proj,0.03747530,0.01000,0.813
27
+ 3,mlp.up_proj,4.85747910,0.01000,0.429
28
+ 3,mlp.gate_proj,12.60369778,0.01000,0.410
29
+ 3,mlp.down_proj,0.26924509,0.01000,-0.695
30
+ 4,self_attn.k_proj,0.60738301,0.01000,0.589
31
+ 4,self_attn.v_proj,0.62895143,0.01000,0.580
32
+ 4,self_attn.q_proj,1.22826421,0.01000,0.395
33
+ 4,self_attn.o_proj,0.08694792,0.01000,0.895
34
+ 4,mlp.up_proj,5.18607664,0.01000,0.424
35
+ 4,mlp.gate_proj,11.80922222,0.01000,0.417
36
+ 4,mlp.down_proj,0.33178610,0.01000,1.255
37
+ 5,self_attn.k_proj,0.91473466,0.01000,0.414
38
+ 5,self_attn.v_proj,0.97595525,0.01000,0.399
39
+ 5,self_attn.q_proj,2.19729996,0.01000,0.390
40
+ 5,self_attn.o_proj,0.10701506,0.01000,0.853
41
+ 5,mlp.up_proj,4.87147427,0.01000,0.447
42
+ 5,mlp.gate_proj,8.15231133,0.01000,0.414
43
+ 5,mlp.down_proj,0.37409323,0.01000,1.543
44
+ 6,self_attn.k_proj,0.72675020,0.01000,0.387
45
+ 6,self_attn.v_proj,0.66761994,0.01000,0.416
46
+ 6,self_attn.q_proj,1.59998417,0.01000,0.580
47
+ 6,self_attn.o_proj,0.09201059,0.01000,0.896
48
+ 6,mlp.up_proj,6.21852303,0.01000,0.444
49
+ 6,mlp.gate_proj,9.88837433,0.01000,0.398
50
+ 6,mlp.down_proj,0.49651879,0.01000,1.820
51
+ 7,self_attn.k_proj,1.26669955,0.01000,0.630
52
+ 7,self_attn.v_proj,1.42955840,0.01000,0.521
53
+ 7,self_attn.q_proj,3.06221533,0.01000,0.607
54
+ 7,self_attn.o_proj,0.13521528,0.01000,1.171
55
+ 7,mlp.up_proj,6.99258566,0.01000,0.594
56
+ 7,mlp.gate_proj,11.28202152,0.01000,0.617
57
+ 7,mlp.down_proj,0.60300756,0.01000,1.500
58
+ 8,self_attn.k_proj,1.67595625,0.01000,0.619
59
+ 8,self_attn.v_proj,1.60502338,0.01000,0.603
60
+ 8,self_attn.q_proj,3.77824259,0.01000,0.620
61
+ 8,self_attn.o_proj,0.14908858,0.01000,1.397
62
+ 8,mlp.up_proj,7.38887978,0.01000,0.699
63
+ 8,mlp.gate_proj,11.49653625,0.01000,0.727
64
+ 8,mlp.down_proj,0.69127196,0.01000,2.227
65
+ 9,self_attn.k_proj,2.98744106,0.01000,0.424
66
+ 9,self_attn.v_proj,3.08891487,0.01000,0.411
67
+ 9,self_attn.q_proj,7.24598789,0.01000,0.376
68
+ 9,self_attn.o_proj,0.24744949,0.01000,-1.119
69
+ 9,mlp.up_proj,8.45534515,0.01000,0.373
70
+ 9,mlp.gate_proj,13.44919872,0.01000,0.372
71
+ 9,mlp.down_proj,0.99226213,0.01000,1.276
72
+ 10,self_attn.k_proj,2.64885235,0.01000,0.509
73
+ 10,self_attn.v_proj,2.75350857,0.01000,0.495
74
+ 10,self_attn.q_proj,6.37406826,0.01000,0.553
75
+ 10,self_attn.o_proj,0.24520609,0.01000,0.987
76
+ 10,mlp.up_proj,8.50265503,0.01000,0.465
77
+ 10,mlp.gate_proj,13.53136253,0.01000,0.459
78
+ 10,mlp.down_proj,1.33377266,0.01000,1.378
79
+ 11,self_attn.k_proj,4.98540545,0.01000,0.455
80
+ 11,self_attn.v_proj,4.54301262,0.01000,0.453
81
+ 11,self_attn.q_proj,12.94447422,0.01000,0.445
82
+ 11,self_attn.o_proj,0.57048726,0.01000,0.945
83
+ 11,mlp.up_proj,8.60743999,0.01000,0.486
84
+ 11,mlp.gate_proj,11.23224926,0.01000,0.478
85
+ 11,mlp.down_proj,1.66301322,0.01000,1.320
86
+ 12,self_attn.k_proj,4.47545052,0.01000,0.556
87
+ 12,self_attn.v_proj,4.59355307,0.01000,0.547
88
+ 12,self_attn.q_proj,11.99758339,0.01000,0.570
89
+ 12,self_attn.o_proj,0.24353495,0.01000,0.980
90
+ 12,mlp.up_proj,8.05024338,0.01000,0.470
91
+ 12,mlp.gate_proj,9.81564045,0.01000,0.467
92
+ 12,mlp.down_proj,1.52156746,0.01000,1.359
93
+ 13,self_attn.k_proj,4.28024578,0.01000,0.455
94
+ 13,self_attn.v_proj,5.05019331,0.01000,0.459
95
+ 13,self_attn.q_proj,12.57633018,0.01000,0.460
96
+ 13,self_attn.o_proj,0.35622856,0.01000,0.909
97
+ 13,mlp.up_proj,8.56893921,0.01000,0.460
98
+ 13,mlp.gate_proj,10.62047768,0.01000,0.542
99
+ 13,mlp.down_proj,1.53902888,0.01000,1.246
100
+ 14,self_attn.k_proj,5.75165176,0.01000,0.463
101
+ 14,self_attn.v_proj,6.18406296,0.01000,0.434
102
+ 14,self_attn.q_proj,15.81763935,0.01000,0.461
103
+ 14,self_attn.o_proj,0.35870335,0.01000,0.932
104
+ 14,mlp.up_proj,9.21350956,0.01000,0.457
105
+ 14,mlp.gate_proj,11.32985497,0.01000,0.433
106
+ 14,mlp.down_proj,2.05143332,0.01000,1.450
107
+ 15,self_attn.k_proj,10.10069084,0.01000,0.471
108
+ 15,self_attn.v_proj,12.60639000,0.01000,0.462
109
+ 15,self_attn.q_proj,31.56911850,0.01000,0.458
110
+ 15,self_attn.o_proj,0.54869986,0.01000,0.878
111
+ 15,mlp.up_proj,10.21630764,0.01000,0.463
112
+ 15,mlp.gate_proj,12.16793251,0.01000,0.475
113
+ 15,mlp.down_proj,2.27123475,0.01000,-0.618
114
+ 16,self_attn.k_proj,14.20170116,0.01000,0.449
115
+ 16,self_attn.v_proj,13.06950951,0.01000,0.456
116
+ 16,self_attn.q_proj,40.14150238,0.01000,0.458
117
+ 16,self_attn.o_proj,0.68756855,0.01000,1.101
118
+ 16,mlp.up_proj,11.80347443,0.01000,0.433
119
+ 16,mlp.gate_proj,13.10005951,0.01000,0.460
120
+ 16,mlp.down_proj,4.53475285,0.01000,1.299
121
+ 17,self_attn.k_proj,29.83925247,0.01000,0.438
122
+ 17,self_attn.v_proj,35.82016754,0.01000,0.441
123
+ 17,self_attn.q_proj,90.73204041,0.01000,0.450
124
+ 17,self_attn.o_proj,1.52864456,0.01000,0.871
125
+ 17,mlp.up_proj,16.85394669,0.01000,0.503
126
+ 17,mlp.gate_proj,19.09194946,0.01000,0.473
127
+ 17,mlp.down_proj,5.12950706,0.01000,1.328
128
+ 18,self_attn.k_proj,27.85187912,0.01000,0.491
129
+ 18,self_attn.v_proj,29.46336555,0.01000,0.440
130
+ 18,self_attn.q_proj,86.81141663,0.01000,0.500
131
+ 18,self_attn.o_proj,1.21956921,0.01000,-1.325
132
+ 18,mlp.up_proj,20.33577156,0.01000,0.450
133
+ 18,mlp.gate_proj,23.08765602,0.01000,0.506
134
+ 18,mlp.down_proj,8.18551445,0.01000,1.637
135
+ 19,self_attn.k_proj,47.84007645,0.01000,0.482
136
+ 19,self_attn.v_proj,59.33264923,0.01000,0.453
137
+ 19,self_attn.q_proj,156.72351074,0.01000,0.468
138
+ 19,self_attn.o_proj,2.17994380,0.01000,1.132
139
+ 19,mlp.up_proj,25.84054565,0.01000,0.429
140
+ 19,mlp.gate_proj,24.49751282,0.01000,0.434
141
+ 19,mlp.down_proj,16.08885384,0.01000,1.426
142
+ 20,self_attn.k_proj,66.53054047,0.01000,0.480
143
+ 20,self_attn.v_proj,79.11817932,0.01000,0.449
144
+ 20,self_attn.q_proj,194.38667297,0.01000,0.451
145
+ 20,self_attn.o_proj,2.74797773,0.01000,0.900
146
+ 20,mlp.up_proj,28.64339828,0.01000,0.483
147
+ 20,mlp.gate_proj,25.69481659,0.01000,0.461
148
+ 20,mlp.down_proj,19.23760605,0.01000,1.325
149
+ 21,self_attn.k_proj,113.03172302,0.01000,0.433
150
+ 21,self_attn.v_proj,146.76942444,0.01000,0.426
151
+ 21,self_attn.q_proj,324.40625000,0.01000,0.427
152
+ 21,self_attn.o_proj,5.76776075,0.01000,0.925
153
+ 21,mlp.up_proj,34.46822739,0.01000,0.407
154
+ 21,mlp.gate_proj,27.89905167,0.01000,0.447
155
+ 21,mlp.down_proj,26.42908859,0.01000,1.104
156
+ 22,self_attn.k_proj,125.87751770,0.01000,0.426
157
+ 22,self_attn.v_proj,180.92504883,0.01000,0.424
158
+ 22,self_attn.q_proj,331.65649414,0.01000,0.431
159
+ 22,self_attn.o_proj,4.63394165,0.01000,0.739
160
+ 22,mlp.up_proj,37.16413116,0.01000,0.366
161
+ 22,mlp.gate_proj,29.45936966,0.01000,0.397
162
+ 22,mlp.down_proj,28.59059715,0.01000,1.551
163
+ 23,self_attn.k_proj,171.20681763,0.01000,0.501
164
+ 23,self_attn.v_proj,195.14624023,0.01000,0.458
165
+ 23,self_attn.q_proj,383.41241455,0.01000,0.468
166
+ 23,self_attn.o_proj,7.37380075,0.01000,0.937
167
+ 23,mlp.up_proj,42.09543228,0.01000,0.470
168
+ 23,mlp.gate_proj,32.35834503,0.01000,0.468
169
+ 23,mlp.down_proj,29.46091080,0.01000,1.385
170
+ 24,self_attn.k_proj,291.67578125,0.01000,0.474
171
+ 24,self_attn.v_proj,316.21063232,0.01000,0.479
172
+ 24,self_attn.q_proj,754.08447266,0.01000,0.472
173
+ 24,self_attn.o_proj,7.86809063,0.01000,1.055
174
+ 24,mlp.up_proj,39.64069748,0.01000,0.484
175
+ 24,mlp.gate_proj,29.28566170,0.01000,0.590
176
+ 24,mlp.down_proj,30.14324951,0.01000,2.362
177
+ 25,self_attn.k_proj,349.52825928,0.01000,0.477
178
+ 25,self_attn.v_proj,526.72943115,0.01000,0.477
179
+ 25,self_attn.q_proj,925.39587402,0.01000,0.482
180
+ 25,self_attn.o_proj,10.81455994,0.01000,0.979
181
+ 25,mlp.up_proj,39.06259918,0.01000,0.487
182
+ 25,mlp.gate_proj,27.49106216,0.01000,0.495
183
+ 25,mlp.down_proj,37.65495300,0.01000,1.569
184
+ 26,self_attn.k_proj,347.12860107,0.01000,0.496
185
+ 26,self_attn.v_proj,485.27230835,0.01000,0.483
186
+ 26,self_attn.q_proj,980.94482422,0.01000,0.490
187
+ 26,self_attn.o_proj,29.47231483,0.01000,1.047
188
+ 26,mlp.up_proj,41.97579956,0.01000,0.491
189
+ 26,mlp.gate_proj,30.15745354,0.01000,0.496
190
+ 26,mlp.down_proj,57.37902069,0.01000,1.621
191
+ 27,self_attn.k_proj,254.03285217,0.01000,0.483
192
+ 27,self_attn.v_proj,308.76086426,0.01000,-1.707
193
+ 27,self_attn.q_proj,559.01251221,0.01000,0.476
194
+ 27,self_attn.o_proj,16.60070992,0.01000,1.032
195
+ 27,mlp.up_proj,74.08196259,0.01000,0.523
196
+ 27,mlp.gate_proj,68.86535645,0.01000,0.492
197
+ 27,mlp.down_proj,66.11711884,0.01000,1.535
Qwen3-0.6B-GPTQ-Int4/quantize_config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bits": 4,
3
+ "group_size": 128,
4
+ "desc_act": false,
5
+ "hyb_act": false,
6
+ "sym": true,
7
+ "lm_head": false,
8
+ "quant_method": "gptq",
9
+ "checkpoint_format": "gptq",
10
+ "pack_dtype": "int32",
11
+ "meta": {
12
+ "quantizer": [
13
+ "gptqmodel:4.0.0"
14
+ ],
15
+ "uri": "https://github.com/modelcloud/gptqmodel",
16
+ "damp_percent": 0.05,
17
+ "damp_auto_increment": 0.01,
18
+ "static_groups": false,
19
+ "true_sequential": true,
20
+ "mse": 0.0,
21
+ "v2": false,
22
+ "v2_alpha": 0.25
23
+ }
24
+ }
Qwen3-0.6B-GPTQ-Int4/special_tokens_map.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|im_end|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": "<unk>"
25
+ }
Qwen3-0.6B-GPTQ-Int4/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
3
+ size 11422654
Qwen3-0.6B-GPTQ-Int4/tokenizer_config.json ADDED
@@ -0,0 +1,240 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ },
181
+ "151665": {
182
+ "content": "<tool_response>",
183
+ "lstrip": false,
184
+ "normalized": false,
185
+ "rstrip": false,
186
+ "single_word": false,
187
+ "special": false
188
+ },
189
+ "151666": {
190
+ "content": "</tool_response>",
191
+ "lstrip": false,
192
+ "normalized": false,
193
+ "rstrip": false,
194
+ "single_word": false,
195
+ "special": false
196
+ },
197
+ "151667": {
198
+ "content": "<think>",
199
+ "lstrip": false,
200
+ "normalized": false,
201
+ "rstrip": false,
202
+ "single_word": false,
203
+ "special": false
204
+ },
205
+ "151668": {
206
+ "content": "</think>",
207
+ "lstrip": false,
208
+ "normalized": false,
209
+ "rstrip": false,
210
+ "single_word": false,
211
+ "special": false
212
+ }
213
+ },
214
+ "additional_special_tokens": [
215
+ "<|im_start|>",
216
+ "<|im_end|>",
217
+ "<|object_ref_start|>",
218
+ "<|object_ref_end|>",
219
+ "<|box_start|>",
220
+ "<|box_end|>",
221
+ "<|quad_start|>",
222
+ "<|quad_end|>",
223
+ "<|vision_start|>",
224
+ "<|vision_end|>",
225
+ "<|vision_pad|>",
226
+ "<|image_pad|>",
227
+ "<|video_pad|>"
228
+ ],
229
+ "bos_token": null,
230
+ "clean_up_tokenization_spaces": false,
231
+ "eos_token": "<|im_end|>",
232
+ "errors": "replace",
233
+ "extra_special_tokens": {},
234
+ "model_max_length": 131072,
235
+ "pad_token": "<unk>",
236
+ "split_special_tokens": false,
237
+ "tokenizer_class": "Qwen2TokenizerFast",
238
+ "unk_token": null,
239
+ "_commit_hash": null
240
+ }
Qwen3-0.6B-GPTQ-Int4/vocab.json ADDED
The diff for this file is too large to render. See raw diff