Upload folder using huggingface_hub

#1
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,3 +1,209 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ license_link: https://huggingface.co/Qwen/Qwen3.6-27B/blob/main/LICENSE
4
+ base_model: Qwen/Qwen3.6-27B
5
+ base_model_relation: quantized
6
+ pipeline_tag: image-text-to-text
7
+ library_name: transformers
8
+ tags:
9
+ - qwen3_5
10
+ - autoround
11
+ - int4
12
+ - w4g128
13
+ - w4a16
14
+ - quantization
15
+ - vllm
16
+ - multimodal
17
+ - mtp
18
+ - speculative-decoding
19
+ - code
20
+ - coding
21
+ ---
22
+ # Qwen3.6-27B INT4 AutoRound — Code Calibrated (Best Recipe)
23
+
24
+ A **W4A16 (INT4 weight, FP16 activation) quantization** of [`Qwen/Qwen3.6-27B`](https://huggingface.co/Qwen/Qwen3.6-27B), produced with [Intel's AutoRound](https://github.com/intel/auto-round).
25
+
26
+ > **Key difference from the standard AutoRound quant:** This variant was calibrated on a **normalized and sampled subset of [nvidia/OpenCodeInstruct](https://huggingface.co/datasets/nvidia/OpenCodeInstruct)** — a ~5 M sample, execution-verified coding dataset — instead of the default general-purpose pile corpus. Calibrating on domain-specific data guides AutoRound's weight-rounding optimization to minimize quantization error on the token distributions that matter most for code, improving accuracy on code generation, reasoning, and instruction-following for programming tasks. The **`auto-round-best` preset** was used (1000 iterations, 512 calibration samples), which runs ~4–5× slower than the standard recipe but achieves the best possible INT4 accuracy. MTP (speculative decoding) and image/vision inputs work out of the box with no post-processing required.
27
+
28
+ ## TL;DR
29
+
30
+ - **Base**: Qwen3.6-27B (27B dense VLM)
31
+ - **Quant**: INT4 W4A16, group_size 128, symmetric
32
+ - **Tool**: `auto-round-best` (1000 iters, 512 samples, torch.compile)
33
+ - **Calibration dataset**: `nvidia/OpenCodeInstruct` (coding domain)
34
+ - **Size**: ~18 GB (down from ~54 GB BF16) — **3× reduction**
35
+ - **MTP**: Native Multi-Token Prediction head preserved in BF16 — enables **native speculative decoding** in vLLM (~85–90% draft acceptance, ~2× throughput)
36
+ - **Vision**: Image inputs work via the MoonViT encoder (weights kept at original BF16/FP16 precision)
37
+
38
+ ## Why code calibration?
39
+
40
+ AutoRound's algorithm optimizes weight rounding by minimizing the difference between the quantized model's outputs and the full-precision model's outputs on a set of calibration samples. **The calibration dataset therefore shapes which activations and weight patterns are prioritized during optimization.**
41
+
42
+ Using a **normalized and sampled subset of `nvidia/OpenCodeInstruct`** — a large, execution-verified dataset of coding problems and solutions — means the rounding decisions are tuned for code-style token distributions: identifiers, keywords, indentation patterns, and structured reasoning. In practice this tends to:
43
+
44
+ - Better preserve accuracy on code generation benchmarks relative to a pile-calibrated quant
45
+ - Improve instruction following for programming tasks (function signatures, docstrings, tool calls)
46
+ - Retain structured output quality (JSON, markdown code blocks, structured diffs)
47
+
48
+ If your primary use-case is code generation or an AI coding assistant, this variant is the recommended choice. For general-purpose or multimodal usage, see the standard [`Qwen3.6-27B-int4-AutoRound`](https://huggingface.co/webhie/Qwen3.6-27B-int4-AutoRound) quant.
49
+
50
+ ## Quick inference with vLLM (with MTP speculative decoding)
51
+
52
+ Requires vLLM v0.19.1+ with Qwen3_5 MTP support. Set the following environment variables before starting:
53
+
54
+ ```bash
55
+ export VLLM_USE_FLASHINFER_SAMPLER=1
56
+ export VLLM_ALLOW_LONG_MAX_MODEL_LEN=1
57
+ export VLLM_FLOAT32_MATMUL_PRECISION=high
58
+ export PYTORCH_CUDA_ALLOC_CONF="expandable_segments:True,max_split_size_mb:512"
59
+ export VLLM_NO_USAGE_STATS=1
60
+ export VLLM_MEMORY_PROFILER_ESTIMATE_CUDAGRAPHS=1
61
+ export VLLM_MARLIN_USE_ATOMIC_ADD=1
62
+ export OMP_NUM_THREADS=1
63
+ export CUDA_DEVICE_MAX_CONNECTIONS=8
64
+ export NCCL_CUMEM_ENABLE=0
65
+ export NCCL_P2P_DISABLE=1
66
+ ```
67
+
68
+ ```bash
69
+ vllm serve webhie/Qwen3.6-27B-int4-AutoRound-Code \
70
+ --served-model-name qwen3.6-27b \
71
+ --host 0.0.0.0 --port 11434 \
72
+ --trust-remote-code \
73
+ --dtype auto \
74
+ --quantization auto_round \
75
+ --max-model-len 200704 \
76
+ --gpu-memory-utilization 0.92 \
77
+ --max-num-seqs 4 \
78
+ --kv-cache-dtype fp8_e4m3 \
79
+ --attention-backend flashinfer \
80
+ --performance-mode throughput \
81
+ --max-num-batched-tokens 2048 \
82
+ --enable-chunked-prefill \
83
+ --enable-auto-tool-choice \
84
+ --tool-call-parser qwen3_coder \
85
+ --reasoning-parser qwen3 \
86
+ --default-chat-template-kwargs '{"preserve_thinking":true}' \
87
+ --override-generation-config '{"temperature":0.6,"top_p":0.95,"top_k":20,"min_p":0.0,"presence_penalty":0.0,"repetition_penalty":1.0}' \
88
+ --enable-prompt-tokens-details \
89
+ --speculative-config '{"method":"mtp","num_speculative_tokens":3}'
90
+ ```
91
+
92
+ Remove `--speculative-config` to disable MTP speculative decoding. See the [vllm-blackwell-guide](https://github.com/lastloop-ai/vllm-blackwell-guide) repo for a full Docker Compose setup with all env vars pre-configured.
93
+
94
+ ### OpenAI-compatible request
95
+
96
+ ```python
97
+ from openai import OpenAI
98
+ client = OpenAI(base_url="http://localhost:11434/v1", api_key="EMPTY")
99
+ r = client.chat.completions.create(
100
+ model="qwen3.6-27b",
101
+ messages=[{"role": "user", "content": "Write a quicksort in Python."}],
102
+ max_tokens=512,
103
+ )
104
+ print(r.choices[0].message.content)
105
+ ```
106
+
107
+ ### Transformers (no spec decoding)
108
+
109
+ ```python
110
+ from transformers import AutoModelForCausalLM, AutoTokenizer
111
+ m = AutoModelForCausalLM.from_pretrained(
112
+ "webhie/Qwen3.6-27B-int4-AutoRound-Code",
113
+ trust_remote_code=True,
114
+ device_map="auto",
115
+ )
116
+ tok = AutoTokenizer.from_pretrained("webhie/Qwen3.6-27B-int4-AutoRound-Code")
117
+ msg = [{"role": "user", "content": "Write a binary search in Python."}]
118
+ ids = tok.apply_chat_template(msg, add_generation_prompt=True, return_tensors="pt").to(m.device)
119
+ print(tok.decode(m.generate(ids, max_new_tokens=256)[0]))
120
+ ```
121
+
122
+ ## Quantization details
123
+
124
+ | Field | Value |
125
+ |---|---|
126
+ | Base | `Qwen/Qwen3.6-27B` |
127
+ | Method | AutoRound (`intel/auto-round`), **best recipe** |
128
+ | Scheme | W4A16 (4-bit weights, FP16 activations) |
129
+ | Bits | 4 |
130
+ | Group size | 128 |
131
+ | Symmetric | yes |
132
+ | Packing format | `auto_round:auto_gptq` |
133
+ | Unquantized layers | `linear_attn.in_proj_a/b`, all LayerNorms, RMSNorms, router gates |
134
+ | Calibration dataset | Normalized & sampled subset of [`nvidia/OpenCodeInstruct`](https://huggingface.co/datasets/nvidia/OpenCodeInstruct) |
135
+ | Calibration samples | 512 |
136
+ | Iterations | 1000 |
137
+ | torch.compile | enabled |
138
+ | GPU used for quant | 1× RTX 5090 (32 GB, SM120), `low_gpu_mem_usage=True` |
139
+
140
+ ### Unquantized layers — why
141
+
142
+ - **`linear_attn.in_proj_a/b`**: low-rank projections in Qwen3.6's Gated DeltaNet whose shapes aren't divisible by 32 (group_size), so AutoRound skips them automatically. Tiny fraction of total parameters.
143
+ - **Norms, routers**: precision-sensitive and very small — kept at full precision.
144
+
145
+ ## Performance
146
+
147
+ Benchmarked on **1× RTX 5090 (32 GB)** with vLLM + FP8 KV cache + MTP n=3:
148
+
149
+ | Config | Throughput |
150
+ |---|---:|
151
+ | vLLM + MTP n=3 | **~150 tok/s** |
152
+ | vLLM (MTP disabled) | **~70 tok/s** |
153
+
154
+ The ~2× speedup comes from ~85–90% draft acceptance via MTP speculative decoding with `num_speculative_tokens: 3`.
155
+
156
+ ## Reproduction
157
+
158
+ ```bash
159
+ pip install auto-round
160
+
161
+ # The calibration data was first normalized and sampled from nvidia/OpenCodeInstruct
162
+ # (formatting cleaned, deduplicated, balanced across domains) and exported as a
163
+ # local JSON file before quantization. Pass your own prepared subset with:
164
+ # --dataset ./subset_10k.json
165
+
166
+ auto-round-best \
167
+ --model Qwen/Qwen3.6-27B \
168
+ --scheme W4A16 \
169
+ --format auto_round \
170
+ --output_dir Qwen3.6-27B-int4-AutoRound-Code \
171
+ --enable_torch_compile \
172
+ --low_gpu_mem_usage \
173
+ --device_map 0
174
+ ```
175
+
176
+ No post-processing needed — MTP and image inputs work out of the box.
177
+
178
+ ## Acknowledgements
179
+
180
+ - [Alibaba / Qwen team](https://huggingface.co/Qwen) for the base [Qwen3.6-27B](https://huggingface.co/Qwen/Qwen3.6-27B) model
181
+ - [Intel AutoRound](https://github.com/intel/auto-round) team for the quantization framework and the `auto-round-best` recipe
182
+ - [NVIDIA](https://huggingface.co/nvidia) for the [OpenCodeInstruct](https://huggingface.co/datasets/nvidia/OpenCodeInstruct) calibration dataset — ~5 M execution-verified coding samples used to domain-adapt this quant
183
+ - [Lorbus](https://huggingface.co/Lorbus) for the original AutoRound quant of this model that inspired this release
184
+ - [@eugr](https://github.com/eugr) for the [spark-vllm-docker](https://github.com/eugr/spark-vllm-docker) fork and TurboQuant KV cache work
185
+ - [vLLM project](https://github.com/vllm-project/vllm) for the inference engine and Qwen3_5 MTP support
186
+
187
+ ## License
188
+
189
+ Apache 2.0 — same as [Qwen3.6-27B base](https://huggingface.co/Qwen/Qwen3.6-27B).
190
+
191
+ ## Citation
192
+
193
+ If you use this quant, please cite the original Qwen3.6 release (see base model card), the AutoRound paper, and the OpenCodeInstruct dataset:
194
+
195
+ ```bibtex
196
+ @article{cheng2023autoround,
197
+ title = {Optimize Weight Rounding via Signed Gradient Descent for the Quantization of LLMs},
198
+ author = {Cheng, Wenhua and Zhang, Weiwei and Shen, Haihao and Cai, Yiyang and He, Xin and Lv, Kaokao and Liu, Yi},
199
+ journal = {arXiv preprint arXiv:2309.05516},
200
+ year = {2023}
201
+ }
202
+
203
+ @misc{nvidia2025opencode,
204
+ title = {OpenCodeInstruct: A Large-scale Instruction Tuning Dataset for Code LLMs},
205
+ author = {NVIDIA},
206
+ year = {2025},
207
+ url = {https://huggingface.co/datasets/nvidia/OpenCodeInstruct}
208
+ }
209
+ ```
chat_template.jinja ADDED
@@ -0,0 +1,230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- set image_count = namespace(value=0) %}
2
+ {%- set video_count = namespace(value=0) %}
3
+ {%- macro render_content(content, do_vision_count, is_system_content=false) %}
4
+ {%- if content is string %}
5
+ {{- content }}
6
+ {%- elif content is iterable and content is not mapping %}
7
+ {%- for item in content %}
8
+ {%- if 'image' in item or 'image_url' in item or item.type == 'image' %}
9
+ {%- if is_system_content %}
10
+ {{- raise_exception('System message cannot contain images.') }}
11
+ {%- endif %}
12
+ {%- if do_vision_count %}
13
+ {%- set image_count.value = image_count.value + 1 %}
14
+ {%- endif %}
15
+ {%- if add_vision_id is defined and add_vision_id %}
16
+ {{- 'Picture ' ~ image_count.value ~ ': ' }}
17
+ {%- endif %}
18
+ {{- '<|vision_start|><|image_pad|><|vision_end|>' }}
19
+ {%- elif 'video' in item or item.type == 'video' %}
20
+ {%- if is_system_content %}
21
+ {{- raise_exception('System message cannot contain videos.') }}
22
+ {%- endif %}
23
+ {%- if do_vision_count %}
24
+ {%- set video_count.value = video_count.value + 1 %}
25
+ {%- endif %}
26
+ {%- if add_vision_id is defined and add_vision_id %}
27
+ {{- 'Video ' ~ video_count.value ~ ': ' }}
28
+ {%- endif %}
29
+ {{- '<|vision_start|><|video_pad|><|vision_end|>' }}
30
+ {%- elif 'text' in item %}
31
+ {{- item.text }}
32
+ {%- else %}
33
+ {{- raise_exception('Unexpected item type in content.') }}
34
+ {%- endif %}
35
+ {%- endfor %}
36
+ {%- elif content is none or content is undefined %}
37
+ {{- '' }}
38
+ {%- else %}
39
+ {{- raise_exception('Unexpected content type.') }}
40
+ {%- endif %}
41
+ {%- endmacro %}
42
+ {%- set ns_flags = namespace(enable_thinking=true, has_tools=false) %}
43
+ {%- if enable_thinking is defined %}
44
+ {%- set ns_flags.enable_thinking = enable_thinking %}
45
+ {%- endif %}
46
+ {%- if not messages %}
47
+ {{- raise_exception('No messages provided.') }}
48
+ {%- endif %}
49
+ {%- set system_content = '' %}
50
+ {%- set has_system = false %}
51
+ {%- if messages[0].role == 'system' or messages[0].role == 'developer' %}
52
+ {%- set has_system = true %}
53
+ {%- set system_content = render_content(messages[0].content, false, true)|trim %}
54
+ {%- if '<|think_off|>' in system_content %}
55
+ {%- set ns_flags.enable_thinking = false %}
56
+ {%- set system_content = system_content | replace('<|think_off|>', '') %}
57
+ {%- endif %}
58
+ {%- if '<|think_on|>' in system_content %}
59
+ {%- set ns_flags.enable_thinking = true %}
60
+ {%- set system_content = system_content | replace('<|think_on|>', '') %}
61
+ {%- endif %}
62
+ {%- set system_content = system_content | trim %}
63
+ {%- endif %}
64
+ {%- if tools and tools is iterable and tools is not mapping %}
65
+ {%- set ns_flags.has_tools = true %}
66
+ {{- '<|im_start|>system\n' }}
67
+ {{- "# Tools\n\nYou have access to the following functions:\n\n<tools>" }}
68
+ {%- for tool in tools %}
69
+ {%- set fn = tool.function if tool.function is defined else tool %}
70
+ {%- set props = {} %}
71
+ {%- set req = [] %}
72
+ {%- if fn.parameters is defined and fn.parameters is mapping %}
73
+ {%- if fn.parameters.properties is defined %}
74
+ {%- set props = fn.parameters.properties %}
75
+ {%- endif %}
76
+ {%- if fn.parameters.required is defined %}
77
+ {%- set req = fn.parameters.required %}
78
+ {%- endif %}
79
+ {%- endif %}
80
+ {%- set ns_p = namespace(sig='') %}
81
+ {%- for pname in props %}
82
+ {%- set pdef = props[pname] %}
83
+ {%- set ptype = 'any' %}
84
+ {%- if pdef.type is defined %}
85
+ {%- if pdef.type == 'array' or pdef.type == 'object' %}
86
+ {%- set ptype = 'array|object' %}
87
+ {%- elif pdef.enum is defined and pdef.enum is iterable and pdef.enum is not string and pdef.enum is not mapping %}
88
+ {%- set ptype = pdef.enum | join('|') %}
89
+ {%- else %}
90
+ {%- set ptype = pdef.type %}
91
+ {%- endif %}
92
+ {%- endif %}
93
+ {%- set part = pname ~ ('' if pname in req else '?') ~ ': ' ~ ptype %}
94
+ {%- set ns_p.sig = ns_p.sig ~ ', ' ~ part if ns_p.sig else part %}
95
+ {%- endfor %}
96
+ {{- '\n- ' ~ fn.name ~ '(' ~ ns_p.sig ~ ')' }}
97
+ {%- if fn.description is defined %}
98
+ {{- ' — ' ~ fn.description }}
99
+ {%- endif %}
100
+ {%- if fn.parameters is defined and fn.parameters is mapping %}
101
+ {%- for pname in props %}
102
+ {%- set pdef = props[pname] %}
103
+ {%- if pdef.type is defined and (pdef.type == 'array' or pdef.type == 'object') %}
104
+ {{- '\n - ' ~ pname ~ ' schema: ' ~ pdef | tojson }}
105
+ {%- endif %}
106
+ {%- endfor %}
107
+ {%- endif %}
108
+ {%- endfor %}
109
+ {{- "\n</tools>" }}
110
+ {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n<tool_call>\n<function=example_function_name>\n<parameter=example_parameter_1>\nvalue_1\n</parameter>\n<parameter=example_parameter_2>\nThis is the value for the second parameter\nthat can span\nmultiple lines\n</parameter>\n</function>\n</tool_call>' }}
111
+ {%- if has_system and system_content %}
112
+ {{- '\n\n' + system_content }}
113
+ {%- endif %}
114
+ {{- '<|im_end|>\n' }}
115
+ {%- elif has_system and system_content %}
116
+ {{- '<|im_start|>system\n' + system_content + '<|im_end|>\n' }}
117
+ {%- endif %}
118
+ {%- for message in messages %}
119
+ {%- set is_system = (message.role == "system" or message.role == "developer") %}
120
+ {%- set content = render_content(message.content, true, is_system)|trim %}
121
+ {%- if '<|think_off|>' in content %}
122
+ {%- set ns_flags.enable_thinking = false %}
123
+ {%- set content = content | replace('<|think_off|>', '') | trim %}
124
+ {%- elif '<|think_on|>' in content %}
125
+ {%- set ns_flags.enable_thinking = true %}
126
+ {%- set content = content | replace('<|think_on|>', '') | trim %}
127
+ {%- endif %}
128
+ {%- if is_system %}
129
+ {%- if not loop.first and content %}
130
+ {{- '<|im_start|>system\n' + content + '<|im_end|>\n' }}
131
+ {%- endif %}
132
+ {%- elif message.role == "user" %}
133
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>\n' }}
134
+ {%- elif message.role == "assistant" %}
135
+ {%- set reasoning_content = '' %}
136
+ {%- if message.reasoning_content is string %}
137
+ {%- set reasoning_content = message.reasoning_content %}
138
+ {%- else %}
139
+ {%- if '</think>' in content or '</thinking>' in content %}
140
+ {%- set think_end_token = '</think>' if '</think>' in content else '</thinking>' %}
141
+ {%- set reasoning_parts = content.split(think_end_token) %}
142
+ {%- set reasoning_content = reasoning_parts[0] %}
143
+ {%- set content = reasoning_parts[1] %}
144
+ {%- if '<think>' in reasoning_content %}
145
+ {%- set reasoning_content = reasoning_content.split('<think>')[1] %}
146
+ {%- endif %}
147
+ {%- elif '<think>' in content %}
148
+ {%- if '<tool_call>' in content %}
149
+ {%- set content = content | replace('<tool_call>', '</think>\n<tool_call>', 1) %}
150
+ {%- set reasoning_parts = content.split('</think>') %}
151
+ {%- set reasoning_content = reasoning_parts[0] %}
152
+ {%- set content = reasoning_parts[1] %}
153
+ {%- if '<think>' in reasoning_content %}
154
+ {%- set reasoning_content = reasoning_content.split('<think>')[1] %}
155
+ {%- endif %}
156
+ {%- else %}
157
+ {%- set reasoning_content = content.split('<think>')[1] %}
158
+ {%- set content = '' %}
159
+ {%- endif %}
160
+ {%- endif %}
161
+ {%- endif %}
162
+ {%- set reasoning_content = reasoning_content | trim %}
163
+ {%- set content = content | trim %}
164
+ {%- set show_think = false %}
165
+ {%- if reasoning_content %}
166
+ {%- if preserve_thinking is defined and preserve_thinking %}
167
+ {%- set show_think = true %}
168
+ {%- elif loop.last %}
169
+ {%- set show_think = true %}
170
+ {%- endif %}
171
+ {%- endif %}
172
+ {%- if show_think %}
173
+ {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content + '\n</think>\n\n' + content }}
174
+ {%- else %}
175
+ {{- '<|im_start|>' + message.role + '\n' + content }}
176
+ {%- endif %}
177
+ {%- if message.tool_calls and message.tool_calls is iterable and message.tool_calls is not mapping %}
178
+ {%- for tool_call in message.tool_calls %}
179
+ {%- if tool_call.function is defined %}
180
+ {%- set tool_call = tool_call.function %}
181
+ {%- endif %}
182
+ {%- if loop.first and content %}
183
+ {{- '\n\n<tool_call>\n<function=' + tool_call.name + '>\n' }}
184
+ {%- else %}
185
+ {{- '\n<tool_call>\n<function=' + tool_call.name + '>\n' }}
186
+ {%- endif %}
187
+ {%- if tool_call.arguments is defined and tool_call.arguments is mapping %}
188
+ {%- if tool_call.arguments|length > 0 %}
189
+ {%- for args_name in tool_call.arguments %}
190
+ {%- set args_value = tool_call.arguments[args_name] %}
191
+ {{- '<parameter=' + args_name + '>\n' }}
192
+ {%- set args_value = args_value | tojson if args_value is mapping or (args_value is iterable and args_value is not string) else args_value | string %}
193
+ {{- args_value }}
194
+ {{- '\n</parameter>\n' }}
195
+ {%- endfor %}
196
+ {%- endif %}
197
+ {%- elif tool_call.arguments is defined and tool_call.arguments is string %}
198
+ {%- if tool_call.arguments|trim|length > 0 %}
199
+ {{- tool_call.arguments }}
200
+ {{- '\n' }}
201
+ {%- endif %}
202
+ {%- endif %}
203
+ {{- '</function>\n</tool_call>' }}
204
+ {%- endfor %}
205
+ {%- endif %}
206
+ {{- '<|im_end|>\n' }}
207
+ {%- elif message.role == "tool" %}
208
+ {%- if loop.previtem and loop.previtem.role != "tool" %}
209
+ {{- '<|im_start|>user' }}
210
+ {%- endif %}
211
+ {{- '\n<tool_response>\n' }}
212
+ {{- content }}
213
+ {{- '\n</tool_response>' }}
214
+ {%- if not loop.last and loop.nextitem.role != "tool" %}
215
+ {{- '<|im_end|>\n' }}
216
+ {%- elif loop.last %}
217
+ {{- '<|im_end|>\n' }}
218
+ {%- endif %}
219
+ {%- else %}
220
+ {{- raise_exception('Unexpected message role.') }}
221
+ {%- endif %}
222
+ {%- endfor %}
223
+ {%- if add_generation_prompt %}
224
+ {{- '<|im_start|>assistant\n' }}
225
+ {%- if ns_flags.enable_thinking is false %}
226
+ {{- '<think>\n\n</think>\n\n' }}
227
+ {%- else %}
228
+ {{- '<think>\n' }}
229
+ {%- endif %}
230
+ {%- endif %}
config.json ADDED
@@ -0,0 +1,548 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3_5ForConditionalGeneration"
4
+ ],
5
+ "dtype": "bfloat16",
6
+ "image_token_id": 248056,
7
+ "language_model_only": false,
8
+ "model_type": "qwen3_5",
9
+ "quantization_config": {
10
+ "autoround_version": "0.13.0",
11
+ "bits": 4,
12
+ "block_name_to_quantize": [
13
+ "model.language_model.layers",
14
+ "mtp.layers"
15
+ ],
16
+ "data_type": "int",
17
+ "extra_config": {
18
+ "model.language_model.layers.0.linear_attn.in_proj_a": {
19
+ "bits": 16,
20
+ "data_type": "fp"
21
+ },
22
+ "model.language_model.layers.0.linear_attn.in_proj_b": {
23
+ "bits": 16,
24
+ "data_type": "fp"
25
+ },
26
+ "model.language_model.layers.1.linear_attn.in_proj_a": {
27
+ "bits": 16,
28
+ "data_type": "fp"
29
+ },
30
+ "model.language_model.layers.1.linear_attn.in_proj_b": {
31
+ "bits": 16,
32
+ "data_type": "fp"
33
+ },
34
+ "model.language_model.layers.10.linear_attn.in_proj_a": {
35
+ "bits": 16,
36
+ "data_type": "fp"
37
+ },
38
+ "model.language_model.layers.10.linear_attn.in_proj_b": {
39
+ "bits": 16,
40
+ "data_type": "fp"
41
+ },
42
+ "model.language_model.layers.12.linear_attn.in_proj_a": {
43
+ "bits": 16,
44
+ "data_type": "fp"
45
+ },
46
+ "model.language_model.layers.12.linear_attn.in_proj_b": {
47
+ "bits": 16,
48
+ "data_type": "fp"
49
+ },
50
+ "model.language_model.layers.13.linear_attn.in_proj_a": {
51
+ "bits": 16,
52
+ "data_type": "fp"
53
+ },
54
+ "model.language_model.layers.13.linear_attn.in_proj_b": {
55
+ "bits": 16,
56
+ "data_type": "fp"
57
+ },
58
+ "model.language_model.layers.14.linear_attn.in_proj_a": {
59
+ "bits": 16,
60
+ "data_type": "fp"
61
+ },
62
+ "model.language_model.layers.14.linear_attn.in_proj_b": {
63
+ "bits": 16,
64
+ "data_type": "fp"
65
+ },
66
+ "model.language_model.layers.16.linear_attn.in_proj_a": {
67
+ "bits": 16,
68
+ "data_type": "fp"
69
+ },
70
+ "model.language_model.layers.16.linear_attn.in_proj_b": {
71
+ "bits": 16,
72
+ "data_type": "fp"
73
+ },
74
+ "model.language_model.layers.17.linear_attn.in_proj_a": {
75
+ "bits": 16,
76
+ "data_type": "fp"
77
+ },
78
+ "model.language_model.layers.17.linear_attn.in_proj_b": {
79
+ "bits": 16,
80
+ "data_type": "fp"
81
+ },
82
+ "model.language_model.layers.18.linear_attn.in_proj_a": {
83
+ "bits": 16,
84
+ "data_type": "fp"
85
+ },
86
+ "model.language_model.layers.18.linear_attn.in_proj_b": {
87
+ "bits": 16,
88
+ "data_type": "fp"
89
+ },
90
+ "model.language_model.layers.2.linear_attn.in_proj_a": {
91
+ "bits": 16,
92
+ "data_type": "fp"
93
+ },
94
+ "model.language_model.layers.2.linear_attn.in_proj_b": {
95
+ "bits": 16,
96
+ "data_type": "fp"
97
+ },
98
+ "model.language_model.layers.20.linear_attn.in_proj_a": {
99
+ "bits": 16,
100
+ "data_type": "fp"
101
+ },
102
+ "model.language_model.layers.20.linear_attn.in_proj_b": {
103
+ "bits": 16,
104
+ "data_type": "fp"
105
+ },
106
+ "model.language_model.layers.21.linear_attn.in_proj_a": {
107
+ "bits": 16,
108
+ "data_type": "fp"
109
+ },
110
+ "model.language_model.layers.21.linear_attn.in_proj_b": {
111
+ "bits": 16,
112
+ "data_type": "fp"
113
+ },
114
+ "model.language_model.layers.22.linear_attn.in_proj_a": {
115
+ "bits": 16,
116
+ "data_type": "fp"
117
+ },
118
+ "model.language_model.layers.22.linear_attn.in_proj_b": {
119
+ "bits": 16,
120
+ "data_type": "fp"
121
+ },
122
+ "model.language_model.layers.24.linear_attn.in_proj_a": {
123
+ "bits": 16,
124
+ "data_type": "fp"
125
+ },
126
+ "model.language_model.layers.24.linear_attn.in_proj_b": {
127
+ "bits": 16,
128
+ "data_type": "fp"
129
+ },
130
+ "model.language_model.layers.25.linear_attn.in_proj_a": {
131
+ "bits": 16,
132
+ "data_type": "fp"
133
+ },
134
+ "model.language_model.layers.25.linear_attn.in_proj_b": {
135
+ "bits": 16,
136
+ "data_type": "fp"
137
+ },
138
+ "model.language_model.layers.26.linear_attn.in_proj_a": {
139
+ "bits": 16,
140
+ "data_type": "fp"
141
+ },
142
+ "model.language_model.layers.26.linear_attn.in_proj_b": {
143
+ "bits": 16,
144
+ "data_type": "fp"
145
+ },
146
+ "model.language_model.layers.28.linear_attn.in_proj_a": {
147
+ "bits": 16,
148
+ "data_type": "fp"
149
+ },
150
+ "model.language_model.layers.28.linear_attn.in_proj_b": {
151
+ "bits": 16,
152
+ "data_type": "fp"
153
+ },
154
+ "model.language_model.layers.29.linear_attn.in_proj_a": {
155
+ "bits": 16,
156
+ "data_type": "fp"
157
+ },
158
+ "model.language_model.layers.29.linear_attn.in_proj_b": {
159
+ "bits": 16,
160
+ "data_type": "fp"
161
+ },
162
+ "model.language_model.layers.30.linear_attn.in_proj_a": {
163
+ "bits": 16,
164
+ "data_type": "fp"
165
+ },
166
+ "model.language_model.layers.30.linear_attn.in_proj_b": {
167
+ "bits": 16,
168
+ "data_type": "fp"
169
+ },
170
+ "model.language_model.layers.32.linear_attn.in_proj_a": {
171
+ "bits": 16,
172
+ "data_type": "fp"
173
+ },
174
+ "model.language_model.layers.32.linear_attn.in_proj_b": {
175
+ "bits": 16,
176
+ "data_type": "fp"
177
+ },
178
+ "model.language_model.layers.33.linear_attn.in_proj_a": {
179
+ "bits": 16,
180
+ "data_type": "fp"
181
+ },
182
+ "model.language_model.layers.33.linear_attn.in_proj_b": {
183
+ "bits": 16,
184
+ "data_type": "fp"
185
+ },
186
+ "model.language_model.layers.34.linear_attn.in_proj_a": {
187
+ "bits": 16,
188
+ "data_type": "fp"
189
+ },
190
+ "model.language_model.layers.34.linear_attn.in_proj_b": {
191
+ "bits": 16,
192
+ "data_type": "fp"
193
+ },
194
+ "model.language_model.layers.36.linear_attn.in_proj_a": {
195
+ "bits": 16,
196
+ "data_type": "fp"
197
+ },
198
+ "model.language_model.layers.36.linear_attn.in_proj_b": {
199
+ "bits": 16,
200
+ "data_type": "fp"
201
+ },
202
+ "model.language_model.layers.37.linear_attn.in_proj_a": {
203
+ "bits": 16,
204
+ "data_type": "fp"
205
+ },
206
+ "model.language_model.layers.37.linear_attn.in_proj_b": {
207
+ "bits": 16,
208
+ "data_type": "fp"
209
+ },
210
+ "model.language_model.layers.38.linear_attn.in_proj_a": {
211
+ "bits": 16,
212
+ "data_type": "fp"
213
+ },
214
+ "model.language_model.layers.38.linear_attn.in_proj_b": {
215
+ "bits": 16,
216
+ "data_type": "fp"
217
+ },
218
+ "model.language_model.layers.4.linear_attn.in_proj_a": {
219
+ "bits": 16,
220
+ "data_type": "fp"
221
+ },
222
+ "model.language_model.layers.4.linear_attn.in_proj_b": {
223
+ "bits": 16,
224
+ "data_type": "fp"
225
+ },
226
+ "model.language_model.layers.40.linear_attn.in_proj_a": {
227
+ "bits": 16,
228
+ "data_type": "fp"
229
+ },
230
+ "model.language_model.layers.40.linear_attn.in_proj_b": {
231
+ "bits": 16,
232
+ "data_type": "fp"
233
+ },
234
+ "model.language_model.layers.41.linear_attn.in_proj_a": {
235
+ "bits": 16,
236
+ "data_type": "fp"
237
+ },
238
+ "model.language_model.layers.41.linear_attn.in_proj_b": {
239
+ "bits": 16,
240
+ "data_type": "fp"
241
+ },
242
+ "model.language_model.layers.42.linear_attn.in_proj_a": {
243
+ "bits": 16,
244
+ "data_type": "fp"
245
+ },
246
+ "model.language_model.layers.42.linear_attn.in_proj_b": {
247
+ "bits": 16,
248
+ "data_type": "fp"
249
+ },
250
+ "model.language_model.layers.44.linear_attn.in_proj_a": {
251
+ "bits": 16,
252
+ "data_type": "fp"
253
+ },
254
+ "model.language_model.layers.44.linear_attn.in_proj_b": {
255
+ "bits": 16,
256
+ "data_type": "fp"
257
+ },
258
+ "model.language_model.layers.45.linear_attn.in_proj_a": {
259
+ "bits": 16,
260
+ "data_type": "fp"
261
+ },
262
+ "model.language_model.layers.45.linear_attn.in_proj_b": {
263
+ "bits": 16,
264
+ "data_type": "fp"
265
+ },
266
+ "model.language_model.layers.46.linear_attn.in_proj_a": {
267
+ "bits": 16,
268
+ "data_type": "fp"
269
+ },
270
+ "model.language_model.layers.46.linear_attn.in_proj_b": {
271
+ "bits": 16,
272
+ "data_type": "fp"
273
+ },
274
+ "model.language_model.layers.48.linear_attn.in_proj_a": {
275
+ "bits": 16,
276
+ "data_type": "fp"
277
+ },
278
+ "model.language_model.layers.48.linear_attn.in_proj_b": {
279
+ "bits": 16,
280
+ "data_type": "fp"
281
+ },
282
+ "model.language_model.layers.49.linear_attn.in_proj_a": {
283
+ "bits": 16,
284
+ "data_type": "fp"
285
+ },
286
+ "model.language_model.layers.49.linear_attn.in_proj_b": {
287
+ "bits": 16,
288
+ "data_type": "fp"
289
+ },
290
+ "model.language_model.layers.5.linear_attn.in_proj_a": {
291
+ "bits": 16,
292
+ "data_type": "fp"
293
+ },
294
+ "model.language_model.layers.5.linear_attn.in_proj_b": {
295
+ "bits": 16,
296
+ "data_type": "fp"
297
+ },
298
+ "model.language_model.layers.50.linear_attn.in_proj_a": {
299
+ "bits": 16,
300
+ "data_type": "fp"
301
+ },
302
+ "model.language_model.layers.50.linear_attn.in_proj_b": {
303
+ "bits": 16,
304
+ "data_type": "fp"
305
+ },
306
+ "model.language_model.layers.52.linear_attn.in_proj_a": {
307
+ "bits": 16,
308
+ "data_type": "fp"
309
+ },
310
+ "model.language_model.layers.52.linear_attn.in_proj_b": {
311
+ "bits": 16,
312
+ "data_type": "fp"
313
+ },
314
+ "model.language_model.layers.53.linear_attn.in_proj_a": {
315
+ "bits": 16,
316
+ "data_type": "fp"
317
+ },
318
+ "model.language_model.layers.53.linear_attn.in_proj_b": {
319
+ "bits": 16,
320
+ "data_type": "fp"
321
+ },
322
+ "model.language_model.layers.54.linear_attn.in_proj_a": {
323
+ "bits": 16,
324
+ "data_type": "fp"
325
+ },
326
+ "model.language_model.layers.54.linear_attn.in_proj_b": {
327
+ "bits": 16,
328
+ "data_type": "fp"
329
+ },
330
+ "model.language_model.layers.56.linear_attn.in_proj_a": {
331
+ "bits": 16,
332
+ "data_type": "fp"
333
+ },
334
+ "model.language_model.layers.56.linear_attn.in_proj_b": {
335
+ "bits": 16,
336
+ "data_type": "fp"
337
+ },
338
+ "model.language_model.layers.57.linear_attn.in_proj_a": {
339
+ "bits": 16,
340
+ "data_type": "fp"
341
+ },
342
+ "model.language_model.layers.57.linear_attn.in_proj_b": {
343
+ "bits": 16,
344
+ "data_type": "fp"
345
+ },
346
+ "model.language_model.layers.58.linear_attn.in_proj_a": {
347
+ "bits": 16,
348
+ "data_type": "fp"
349
+ },
350
+ "model.language_model.layers.58.linear_attn.in_proj_b": {
351
+ "bits": 16,
352
+ "data_type": "fp"
353
+ },
354
+ "model.language_model.layers.6.linear_attn.in_proj_a": {
355
+ "bits": 16,
356
+ "data_type": "fp"
357
+ },
358
+ "model.language_model.layers.6.linear_attn.in_proj_b": {
359
+ "bits": 16,
360
+ "data_type": "fp"
361
+ },
362
+ "model.language_model.layers.60.linear_attn.in_proj_a": {
363
+ "bits": 16,
364
+ "data_type": "fp"
365
+ },
366
+ "model.language_model.layers.60.linear_attn.in_proj_b": {
367
+ "bits": 16,
368
+ "data_type": "fp"
369
+ },
370
+ "model.language_model.layers.61.linear_attn.in_proj_a": {
371
+ "bits": 16,
372
+ "data_type": "fp"
373
+ },
374
+ "model.language_model.layers.61.linear_attn.in_proj_b": {
375
+ "bits": 16,
376
+ "data_type": "fp"
377
+ },
378
+ "model.language_model.layers.62.linear_attn.in_proj_a": {
379
+ "bits": 16,
380
+ "data_type": "fp"
381
+ },
382
+ "model.language_model.layers.62.linear_attn.in_proj_b": {
383
+ "bits": 16,
384
+ "data_type": "fp"
385
+ },
386
+ "model.language_model.layers.8.linear_attn.in_proj_a": {
387
+ "bits": 16,
388
+ "data_type": "fp"
389
+ },
390
+ "model.language_model.layers.8.linear_attn.in_proj_b": {
391
+ "bits": 16,
392
+ "data_type": "fp"
393
+ },
394
+ "model.language_model.layers.9.linear_attn.in_proj_a": {
395
+ "bits": 16,
396
+ "data_type": "fp"
397
+ },
398
+ "model.language_model.layers.9.linear_attn.in_proj_b": {
399
+ "bits": 16,
400
+ "data_type": "fp"
401
+ },
402
+ "mtp.fc": {
403
+ "bits": 16,
404
+ "data_type": "fp"
405
+ }
406
+ },
407
+ "group_size": 128,
408
+ "iters": 1000,
409
+ "low_gpu_mem_usage": true,
410
+ "nsamples": 512,
411
+ "packing_format": "auto_round:auto_gptq",
412
+ "quant_method": "auto-round",
413
+ "sym": true
414
+ },
415
+ "text_config": {
416
+ "attention_bias": false,
417
+ "attention_dropout": 0.0,
418
+ "attn_output_gate": true,
419
+ "bos_token_id": 248044,
420
+ "dtype": "bfloat16",
421
+ "eos_token_id": 248044,
422
+ "full_attention_interval": 4,
423
+ "head_dim": 256,
424
+ "hidden_act": "silu",
425
+ "hidden_size": 5120,
426
+ "initializer_range": 0.02,
427
+ "intermediate_size": 17408,
428
+ "layer_types": [
429
+ "linear_attention",
430
+ "linear_attention",
431
+ "linear_attention",
432
+ "full_attention",
433
+ "linear_attention",
434
+ "linear_attention",
435
+ "linear_attention",
436
+ "full_attention",
437
+ "linear_attention",
438
+ "linear_attention",
439
+ "linear_attention",
440
+ "full_attention",
441
+ "linear_attention",
442
+ "linear_attention",
443
+ "linear_attention",
444
+ "full_attention",
445
+ "linear_attention",
446
+ "linear_attention",
447
+ "linear_attention",
448
+ "full_attention",
449
+ "linear_attention",
450
+ "linear_attention",
451
+ "linear_attention",
452
+ "full_attention",
453
+ "linear_attention",
454
+ "linear_attention",
455
+ "linear_attention",
456
+ "full_attention",
457
+ "linear_attention",
458
+ "linear_attention",
459
+ "linear_attention",
460
+ "full_attention",
461
+ "linear_attention",
462
+ "linear_attention",
463
+ "linear_attention",
464
+ "full_attention",
465
+ "linear_attention",
466
+ "linear_attention",
467
+ "linear_attention",
468
+ "full_attention",
469
+ "linear_attention",
470
+ "linear_attention",
471
+ "linear_attention",
472
+ "full_attention",
473
+ "linear_attention",
474
+ "linear_attention",
475
+ "linear_attention",
476
+ "full_attention",
477
+ "linear_attention",
478
+ "linear_attention",
479
+ "linear_attention",
480
+ "full_attention",
481
+ "linear_attention",
482
+ "linear_attention",
483
+ "linear_attention",
484
+ "full_attention",
485
+ "linear_attention",
486
+ "linear_attention",
487
+ "linear_attention",
488
+ "full_attention",
489
+ "linear_attention",
490
+ "linear_attention",
491
+ "linear_attention",
492
+ "full_attention"
493
+ ],
494
+ "linear_conv_kernel_dim": 4,
495
+ "linear_key_head_dim": 128,
496
+ "linear_num_key_heads": 16,
497
+ "linear_num_value_heads": 48,
498
+ "linear_value_head_dim": 128,
499
+ "mamba_ssm_dtype": "float32",
500
+ "max_position_embeddings": 262144,
501
+ "model_type": "qwen3_5_text",
502
+ "mtp_num_hidden_layers": 1,
503
+ "mtp_use_dedicated_embeddings": false,
504
+ "num_attention_heads": 24,
505
+ "num_hidden_layers": 64,
506
+ "num_key_value_heads": 4,
507
+ "output_gate_type": "swish",
508
+ "pad_token_id": null,
509
+ "partial_rotary_factor": 0.25,
510
+ "rms_norm_eps": 1e-06,
511
+ "rope_parameters": {
512
+ "mrope_interleaved": true,
513
+ "mrope_section": [
514
+ 11,
515
+ 11,
516
+ 10
517
+ ],
518
+ "partial_rotary_factor": 0.25,
519
+ "rope_theta": 10000000,
520
+ "rope_type": "default"
521
+ },
522
+ "tie_word_embeddings": false,
523
+ "use_cache": true,
524
+ "vocab_size": 248320
525
+ },
526
+ "tie_word_embeddings": false,
527
+ "transformers_version": "5.8.0",
528
+ "video_token_id": 248057,
529
+ "vision_config": {
530
+ "deepstack_visual_indexes": [],
531
+ "depth": 27,
532
+ "dtype": "bfloat16",
533
+ "hidden_act": "gelu_pytorch_tanh",
534
+ "hidden_size": 1152,
535
+ "in_channels": 3,
536
+ "initializer_range": 0.02,
537
+ "intermediate_size": 4304,
538
+ "model_type": "qwen3_5_vision",
539
+ "num_heads": 16,
540
+ "num_position_embeddings": 2304,
541
+ "out_hidden_size": 5120,
542
+ "patch_size": 16,
543
+ "spatial_merge_size": 2,
544
+ "temporal_patch_size": 2
545
+ },
546
+ "vision_end_token_id": 248054,
547
+ "vision_start_token_id": 248053
548
+ }
generation_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 248044,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 248046,
6
+ 248044
7
+ ],
8
+ "pad_token_id": 248044,
9
+ "temperature": 1.0,
10
+ "top_k": 20,
11
+ "top_p": 0.95,
12
+ "transformers_version": "5.8.0"
13
+ }
model-00001-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c5f31382db1faf18a5bc32a8892c6b5143a5a0f2bb1e552f8844f7ab5f3c947
3
+ size 3216489680
model-00002-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5542d5c232efb98d64e3fc4e9210111d32e2b4a23272a2d7c02e18004f07ab60
3
+ size 3190151576
model-00003-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:211ce8a9b7899221ccd414a15a1e2bdc550b118d9938302108e4f8579f423d94
3
+ size 3219130488
model-00004-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2ea3aecda21080c0486d0a26c31b34caf83b154cf97652d7c33752f96372712
3
+ size 3216860120
model-00005-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:517ead0081fdcb0113a6a881dfabadaa1159874298c06f22c802889d67e0613f
3
+ size 770164464
model-00006-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e9b80896068ddd0a9603b93e5fa0c34673131cc25123f980ad34512e0399a2a
3
+ size 2542807272
model-00007-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:abed7b0a601b5837427b299e44faad8ad0ec64bdce16da2147736eeff4bb86a9
3
+ size 2542796896
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
model_extra_tensors.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1981cfd8decc58e5f3e7e067c65983e8753fc1587110a420d14ba7b87ecca557
3
+ size 298305576
preprocessor_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_convert_rgb": true,
3
+ "do_normalize": true,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "image_mean": [
7
+ 0.5,
8
+ 0.5,
9
+ 0.5
10
+ ],
11
+ "image_processor_type": "Qwen2VLImageProcessor",
12
+ "image_std": [
13
+ 0.5,
14
+ 0.5,
15
+ 0.5
16
+ ],
17
+ "merge_size": 2,
18
+ "patch_size": 16,
19
+ "resample": 3,
20
+ "rescale_factor": 0.00392156862745098,
21
+ "size": {
22
+ "longest_edge": 16777216,
23
+ "shortest_edge": 65536
24
+ },
25
+ "temporal_patch_size": 2
26
+ }
processor_config.json ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "image_processor": {
3
+ "do_convert_rgb": true,
4
+ "do_normalize": true,
5
+ "do_rescale": true,
6
+ "do_resize": true,
7
+ "image_mean": [
8
+ 0.5,
9
+ 0.5,
10
+ 0.5
11
+ ],
12
+ "image_processor_type": "Qwen2VLImageProcessor",
13
+ "image_std": [
14
+ 0.5,
15
+ 0.5,
16
+ 0.5
17
+ ],
18
+ "merge_size": 2,
19
+ "patch_size": 16,
20
+ "resample": 3,
21
+ "rescale_factor": 0.00392156862745098,
22
+ "size": {
23
+ "longest_edge": 16777216,
24
+ "shortest_edge": 65536
25
+ },
26
+ "temporal_patch_size": 2
27
+ },
28
+ "processor_class": "Qwen3VLProcessor",
29
+ "video_processor": {
30
+ "do_convert_rgb": true,
31
+ "do_normalize": true,
32
+ "do_rescale": true,
33
+ "do_resize": true,
34
+ "do_sample_frames": true,
35
+ "fps": 2,
36
+ "image_mean": [
37
+ 0.5,
38
+ 0.5,
39
+ 0.5
40
+ ],
41
+ "image_std": [
42
+ 0.5,
43
+ 0.5,
44
+ 0.5
45
+ ],
46
+ "max_frames": 768,
47
+ "merge_size": 2,
48
+ "min_frames": 4,
49
+ "patch_size": 16,
50
+ "resample": 3,
51
+ "rescale_factor": 0.00392156862745098,
52
+ "return_metadata": false,
53
+ "size": {
54
+ "longest_edge": 25165824,
55
+ "shortest_edge": 4096
56
+ },
57
+ "temporal_patch_size": 2,
58
+ "video_processor_type": "Qwen3VLVideoProcessor"
59
+ }
60
+ }
quantization_config.json ADDED
@@ -0,0 +1,399 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bits": 4,
3
+ "data_type": "int",
4
+ "group_size": 128,
5
+ "sym": true,
6
+ "iters": 1000,
7
+ "low_gpu_mem_usage": true,
8
+ "nsamples": 512,
9
+ "autoround_version": "0.13.0",
10
+ "block_name_to_quantize": "model.language_model.layers",
11
+ "quant_method": "auto-round",
12
+ "packing_format": "auto_round:auto_gptq",
13
+ "extra_config": {
14
+ "model.language_model.layers.0.linear_attn.in_proj_b": {
15
+ "bits": 16,
16
+ "data_type": "fp"
17
+ },
18
+ "model.language_model.layers.0.linear_attn.in_proj_a": {
19
+ "bits": 16,
20
+ "data_type": "fp"
21
+ },
22
+ "model.language_model.layers.1.linear_attn.in_proj_b": {
23
+ "bits": 16,
24
+ "data_type": "fp"
25
+ },
26
+ "model.language_model.layers.1.linear_attn.in_proj_a": {
27
+ "bits": 16,
28
+ "data_type": "fp"
29
+ },
30
+ "model.language_model.layers.2.linear_attn.in_proj_b": {
31
+ "bits": 16,
32
+ "data_type": "fp"
33
+ },
34
+ "model.language_model.layers.2.linear_attn.in_proj_a": {
35
+ "bits": 16,
36
+ "data_type": "fp"
37
+ },
38
+ "model.language_model.layers.4.linear_attn.in_proj_b": {
39
+ "bits": 16,
40
+ "data_type": "fp"
41
+ },
42
+ "model.language_model.layers.4.linear_attn.in_proj_a": {
43
+ "bits": 16,
44
+ "data_type": "fp"
45
+ },
46
+ "model.language_model.layers.5.linear_attn.in_proj_b": {
47
+ "bits": 16,
48
+ "data_type": "fp"
49
+ },
50
+ "model.language_model.layers.5.linear_attn.in_proj_a": {
51
+ "bits": 16,
52
+ "data_type": "fp"
53
+ },
54
+ "model.language_model.layers.6.linear_attn.in_proj_b": {
55
+ "bits": 16,
56
+ "data_type": "fp"
57
+ },
58
+ "model.language_model.layers.6.linear_attn.in_proj_a": {
59
+ "bits": 16,
60
+ "data_type": "fp"
61
+ },
62
+ "model.language_model.layers.8.linear_attn.in_proj_b": {
63
+ "bits": 16,
64
+ "data_type": "fp"
65
+ },
66
+ "model.language_model.layers.8.linear_attn.in_proj_a": {
67
+ "bits": 16,
68
+ "data_type": "fp"
69
+ },
70
+ "model.language_model.layers.9.linear_attn.in_proj_b": {
71
+ "bits": 16,
72
+ "data_type": "fp"
73
+ },
74
+ "model.language_model.layers.9.linear_attn.in_proj_a": {
75
+ "bits": 16,
76
+ "data_type": "fp"
77
+ },
78
+ "model.language_model.layers.10.linear_attn.in_proj_b": {
79
+ "bits": 16,
80
+ "data_type": "fp"
81
+ },
82
+ "model.language_model.layers.10.linear_attn.in_proj_a": {
83
+ "bits": 16,
84
+ "data_type": "fp"
85
+ },
86
+ "model.language_model.layers.12.linear_attn.in_proj_b": {
87
+ "bits": 16,
88
+ "data_type": "fp"
89
+ },
90
+ "model.language_model.layers.12.linear_attn.in_proj_a": {
91
+ "bits": 16,
92
+ "data_type": "fp"
93
+ },
94
+ "model.language_model.layers.13.linear_attn.in_proj_b": {
95
+ "bits": 16,
96
+ "data_type": "fp"
97
+ },
98
+ "model.language_model.layers.13.linear_attn.in_proj_a": {
99
+ "bits": 16,
100
+ "data_type": "fp"
101
+ },
102
+ "model.language_model.layers.14.linear_attn.in_proj_b": {
103
+ "bits": 16,
104
+ "data_type": "fp"
105
+ },
106
+ "model.language_model.layers.14.linear_attn.in_proj_a": {
107
+ "bits": 16,
108
+ "data_type": "fp"
109
+ },
110
+ "model.language_model.layers.16.linear_attn.in_proj_b": {
111
+ "bits": 16,
112
+ "data_type": "fp"
113
+ },
114
+ "model.language_model.layers.16.linear_attn.in_proj_a": {
115
+ "bits": 16,
116
+ "data_type": "fp"
117
+ },
118
+ "model.language_model.layers.17.linear_attn.in_proj_b": {
119
+ "bits": 16,
120
+ "data_type": "fp"
121
+ },
122
+ "model.language_model.layers.17.linear_attn.in_proj_a": {
123
+ "bits": 16,
124
+ "data_type": "fp"
125
+ },
126
+ "model.language_model.layers.18.linear_attn.in_proj_b": {
127
+ "bits": 16,
128
+ "data_type": "fp"
129
+ },
130
+ "model.language_model.layers.18.linear_attn.in_proj_a": {
131
+ "bits": 16,
132
+ "data_type": "fp"
133
+ },
134
+ "model.language_model.layers.20.linear_attn.in_proj_b": {
135
+ "bits": 16,
136
+ "data_type": "fp"
137
+ },
138
+ "model.language_model.layers.20.linear_attn.in_proj_a": {
139
+ "bits": 16,
140
+ "data_type": "fp"
141
+ },
142
+ "model.language_model.layers.21.linear_attn.in_proj_b": {
143
+ "bits": 16,
144
+ "data_type": "fp"
145
+ },
146
+ "model.language_model.layers.21.linear_attn.in_proj_a": {
147
+ "bits": 16,
148
+ "data_type": "fp"
149
+ },
150
+ "model.language_model.layers.22.linear_attn.in_proj_b": {
151
+ "bits": 16,
152
+ "data_type": "fp"
153
+ },
154
+ "model.language_model.layers.22.linear_attn.in_proj_a": {
155
+ "bits": 16,
156
+ "data_type": "fp"
157
+ },
158
+ "model.language_model.layers.24.linear_attn.in_proj_b": {
159
+ "bits": 16,
160
+ "data_type": "fp"
161
+ },
162
+ "model.language_model.layers.24.linear_attn.in_proj_a": {
163
+ "bits": 16,
164
+ "data_type": "fp"
165
+ },
166
+ "model.language_model.layers.25.linear_attn.in_proj_b": {
167
+ "bits": 16,
168
+ "data_type": "fp"
169
+ },
170
+ "model.language_model.layers.25.linear_attn.in_proj_a": {
171
+ "bits": 16,
172
+ "data_type": "fp"
173
+ },
174
+ "model.language_model.layers.26.linear_attn.in_proj_b": {
175
+ "bits": 16,
176
+ "data_type": "fp"
177
+ },
178
+ "model.language_model.layers.26.linear_attn.in_proj_a": {
179
+ "bits": 16,
180
+ "data_type": "fp"
181
+ },
182
+ "model.language_model.layers.28.linear_attn.in_proj_b": {
183
+ "bits": 16,
184
+ "data_type": "fp"
185
+ },
186
+ "model.language_model.layers.28.linear_attn.in_proj_a": {
187
+ "bits": 16,
188
+ "data_type": "fp"
189
+ },
190
+ "model.language_model.layers.29.linear_attn.in_proj_b": {
191
+ "bits": 16,
192
+ "data_type": "fp"
193
+ },
194
+ "model.language_model.layers.29.linear_attn.in_proj_a": {
195
+ "bits": 16,
196
+ "data_type": "fp"
197
+ },
198
+ "model.language_model.layers.30.linear_attn.in_proj_b": {
199
+ "bits": 16,
200
+ "data_type": "fp"
201
+ },
202
+ "model.language_model.layers.30.linear_attn.in_proj_a": {
203
+ "bits": 16,
204
+ "data_type": "fp"
205
+ },
206
+ "model.language_model.layers.32.linear_attn.in_proj_b": {
207
+ "bits": 16,
208
+ "data_type": "fp"
209
+ },
210
+ "model.language_model.layers.32.linear_attn.in_proj_a": {
211
+ "bits": 16,
212
+ "data_type": "fp"
213
+ },
214
+ "model.language_model.layers.33.linear_attn.in_proj_b": {
215
+ "bits": 16,
216
+ "data_type": "fp"
217
+ },
218
+ "model.language_model.layers.33.linear_attn.in_proj_a": {
219
+ "bits": 16,
220
+ "data_type": "fp"
221
+ },
222
+ "model.language_model.layers.34.linear_attn.in_proj_b": {
223
+ "bits": 16,
224
+ "data_type": "fp"
225
+ },
226
+ "model.language_model.layers.34.linear_attn.in_proj_a": {
227
+ "bits": 16,
228
+ "data_type": "fp"
229
+ },
230
+ "model.language_model.layers.36.linear_attn.in_proj_b": {
231
+ "bits": 16,
232
+ "data_type": "fp"
233
+ },
234
+ "model.language_model.layers.36.linear_attn.in_proj_a": {
235
+ "bits": 16,
236
+ "data_type": "fp"
237
+ },
238
+ "model.language_model.layers.37.linear_attn.in_proj_b": {
239
+ "bits": 16,
240
+ "data_type": "fp"
241
+ },
242
+ "model.language_model.layers.37.linear_attn.in_proj_a": {
243
+ "bits": 16,
244
+ "data_type": "fp"
245
+ },
246
+ "model.language_model.layers.38.linear_attn.in_proj_b": {
247
+ "bits": 16,
248
+ "data_type": "fp"
249
+ },
250
+ "model.language_model.layers.38.linear_attn.in_proj_a": {
251
+ "bits": 16,
252
+ "data_type": "fp"
253
+ },
254
+ "model.language_model.layers.40.linear_attn.in_proj_b": {
255
+ "bits": 16,
256
+ "data_type": "fp"
257
+ },
258
+ "model.language_model.layers.40.linear_attn.in_proj_a": {
259
+ "bits": 16,
260
+ "data_type": "fp"
261
+ },
262
+ "model.language_model.layers.41.linear_attn.in_proj_b": {
263
+ "bits": 16,
264
+ "data_type": "fp"
265
+ },
266
+ "model.language_model.layers.41.linear_attn.in_proj_a": {
267
+ "bits": 16,
268
+ "data_type": "fp"
269
+ },
270
+ "model.language_model.layers.42.linear_attn.in_proj_b": {
271
+ "bits": 16,
272
+ "data_type": "fp"
273
+ },
274
+ "model.language_model.layers.42.linear_attn.in_proj_a": {
275
+ "bits": 16,
276
+ "data_type": "fp"
277
+ },
278
+ "model.language_model.layers.44.linear_attn.in_proj_b": {
279
+ "bits": 16,
280
+ "data_type": "fp"
281
+ },
282
+ "model.language_model.layers.44.linear_attn.in_proj_a": {
283
+ "bits": 16,
284
+ "data_type": "fp"
285
+ },
286
+ "model.language_model.layers.45.linear_attn.in_proj_b": {
287
+ "bits": 16,
288
+ "data_type": "fp"
289
+ },
290
+ "model.language_model.layers.45.linear_attn.in_proj_a": {
291
+ "bits": 16,
292
+ "data_type": "fp"
293
+ },
294
+ "model.language_model.layers.46.linear_attn.in_proj_b": {
295
+ "bits": 16,
296
+ "data_type": "fp"
297
+ },
298
+ "model.language_model.layers.46.linear_attn.in_proj_a": {
299
+ "bits": 16,
300
+ "data_type": "fp"
301
+ },
302
+ "model.language_model.layers.48.linear_attn.in_proj_b": {
303
+ "bits": 16,
304
+ "data_type": "fp"
305
+ },
306
+ "model.language_model.layers.48.linear_attn.in_proj_a": {
307
+ "bits": 16,
308
+ "data_type": "fp"
309
+ },
310
+ "model.language_model.layers.49.linear_attn.in_proj_b": {
311
+ "bits": 16,
312
+ "data_type": "fp"
313
+ },
314
+ "model.language_model.layers.49.linear_attn.in_proj_a": {
315
+ "bits": 16,
316
+ "data_type": "fp"
317
+ },
318
+ "model.language_model.layers.50.linear_attn.in_proj_b": {
319
+ "bits": 16,
320
+ "data_type": "fp"
321
+ },
322
+ "model.language_model.layers.50.linear_attn.in_proj_a": {
323
+ "bits": 16,
324
+ "data_type": "fp"
325
+ },
326
+ "model.language_model.layers.52.linear_attn.in_proj_b": {
327
+ "bits": 16,
328
+ "data_type": "fp"
329
+ },
330
+ "model.language_model.layers.52.linear_attn.in_proj_a": {
331
+ "bits": 16,
332
+ "data_type": "fp"
333
+ },
334
+ "model.language_model.layers.53.linear_attn.in_proj_b": {
335
+ "bits": 16,
336
+ "data_type": "fp"
337
+ },
338
+ "model.language_model.layers.53.linear_attn.in_proj_a": {
339
+ "bits": 16,
340
+ "data_type": "fp"
341
+ },
342
+ "model.language_model.layers.54.linear_attn.in_proj_b": {
343
+ "bits": 16,
344
+ "data_type": "fp"
345
+ },
346
+ "model.language_model.layers.54.linear_attn.in_proj_a": {
347
+ "bits": 16,
348
+ "data_type": "fp"
349
+ },
350
+ "model.language_model.layers.56.linear_attn.in_proj_b": {
351
+ "bits": 16,
352
+ "data_type": "fp"
353
+ },
354
+ "model.language_model.layers.56.linear_attn.in_proj_a": {
355
+ "bits": 16,
356
+ "data_type": "fp"
357
+ },
358
+ "model.language_model.layers.57.linear_attn.in_proj_b": {
359
+ "bits": 16,
360
+ "data_type": "fp"
361
+ },
362
+ "model.language_model.layers.57.linear_attn.in_proj_a": {
363
+ "bits": 16,
364
+ "data_type": "fp"
365
+ },
366
+ "model.language_model.layers.58.linear_attn.in_proj_b": {
367
+ "bits": 16,
368
+ "data_type": "fp"
369
+ },
370
+ "model.language_model.layers.58.linear_attn.in_proj_a": {
371
+ "bits": 16,
372
+ "data_type": "fp"
373
+ },
374
+ "model.language_model.layers.60.linear_attn.in_proj_b": {
375
+ "bits": 16,
376
+ "data_type": "fp"
377
+ },
378
+ "model.language_model.layers.60.linear_attn.in_proj_a": {
379
+ "bits": 16,
380
+ "data_type": "fp"
381
+ },
382
+ "model.language_model.layers.61.linear_attn.in_proj_b": {
383
+ "bits": 16,
384
+ "data_type": "fp"
385
+ },
386
+ "model.language_model.layers.61.linear_attn.in_proj_a": {
387
+ "bits": 16,
388
+ "data_type": "fp"
389
+ },
390
+ "model.language_model.layers.62.linear_attn.in_proj_b": {
391
+ "bits": 16,
392
+ "data_type": "fp"
393
+ },
394
+ "model.language_model.layers.62.linear_attn.in_proj_a": {
395
+ "bits": 16,
396
+ "data_type": "fp"
397
+ }
398
+ }
399
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06b9509352d2af50381ab2247e083b80d32d5c0aba91c272ca9ff729b6a0e523
3
+ size 19989325
tokenizer_config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "audio_bos_token": "<|audio_start|>",
4
+ "audio_eos_token": "<|audio_end|>",
5
+ "audio_token": "<|audio_pad|>",
6
+ "backend": "tokenizers",
7
+ "bos_token": null,
8
+ "clean_up_tokenization_spaces": false,
9
+ "eos_token": "<|im_end|>",
10
+ "errors": "replace",
11
+ "image_token": "<|image_pad|>",
12
+ "is_local": false,
13
+ "local_files_only": false,
14
+ "model_max_length": 262144,
15
+ "model_specific_special_tokens": {
16
+ "audio_bos_token": "<|audio_start|>",
17
+ "audio_eos_token": "<|audio_end|>",
18
+ "audio_token": "<|audio_pad|>",
19
+ "image_token": "<|image_pad|>",
20
+ "video_token": "<|video_pad|>",
21
+ "vision_bos_token": "<|vision_start|>",
22
+ "vision_eos_token": "<|vision_end|>"
23
+ },
24
+ "pad_token": "<|endoftext|>",
25
+ "pretokenize_regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?[\\p{L}\\p{M}]+|\\p{N}| ?[^\\s\\p{L}\\p{M}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
26
+ "processor_class": "Qwen3VLProcessor",
27
+ "split_special_tokens": false,
28
+ "tokenizer_class": "Qwen2Tokenizer",
29
+ "unk_token": null,
30
+ "video_token": "<|video_pad|>",
31
+ "vision_bos_token": "<|vision_start|>",
32
+ "vision_eos_token": "<|vision_end|>"
33
+ }