cpatonn commited on
Commit
fa29333
·
verified ·
1 Parent(s): fd002a9

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -6,56 +6,6 @@ pipeline_tag: text-generation
6
  base_model: Qwen/Qwen3-Coder-Next
7
  ---
8
 
9
- # Qwen3-Coder-Next AWQ - INT4
10
-
11
- ## Model Details
12
-
13
- ### Quantization Details
14
-
15
- - **Quantization Method:** cyankiwi AWQ v1.0
16
- - **Bits:** 4
17
- - **Group Size:** 32
18
- - **Calibration Dataset:** [nvidia/Nemotron-SWE-v1](https://huggingface.co/datasets/nvidia/Nemotron-SWE-v1)
19
- - **Quantization Tool:** [llm-compressor](https://github.com/vllm-project/llm-compressor)
20
-
21
- ### Memory Usage
22
-
23
- | **Type** | **Qwen3-Coder-Next** | **Qwen3-Coder-Next-AWQ-4bit** |
24
- |:---------------:|:----------------:|:----------------:|
25
- | **Memory Size** | 151.5 GB | 45.9 GB |
26
-
27
- ## Inference
28
-
29
- ### Prerequisite
30
-
31
- ```bash
32
- pip install vllm
33
- ```
34
-
35
- ### Basic Usage
36
-
37
- ```bash
38
- VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 vllm serve cyankiwi/Qwen3-Coder-Next-AWQ-4bit \
39
- --port 8000 \
40
- --tensor-parallel-size 4 \
41
- --max-model-len 262144
42
- ```
43
-
44
- ## Additional Information
45
-
46
- ### Known Issues
47
-
48
- - No MTP implementation
49
-
50
- ### Changelog
51
-
52
- - **v0.9.0** - Initial quantized release
53
-
54
- ### Authors
55
-
56
- - **Name:** Ton Cao
57
- - **Contacts:** ton@cyan.kiwi
58
-
59
  # Qwen3-Coder-Next
60
 
61
  ## Highlights
@@ -256,4 +206,4 @@ If you find our work helpful, feel free to give us a cite.
256
  url = {https://github.com/QwenLM/Qwen3-Coder/blob/main/qwen3_coder_next_tech_report.pdf},
257
  note = {Accessed: 2026-02-03}
258
  }
259
- ```
 
6
  base_model: Qwen/Qwen3-Coder-Next
7
  ---
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  # Qwen3-Coder-Next
10
 
11
  ## Highlights
 
206
  url = {https://github.com/QwenLM/Qwen3-Coder/blob/main/qwen3_coder_next_tech_report.pdf},
207
  note = {Accessed: 2026-02-03}
208
  }
209
+ ```
added_tokens.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</think>": 151668,
3
+ "</tool_call>": 151658,
4
+ "</tool_response>": 151666,
5
+ "<think>": 151667,
6
+ "<tool_call>": 151657,
7
+ "<tool_response>": 151665,
8
+ "<|box_end|>": 151649,
9
+ "<|box_start|>": 151648,
10
+ "<|endoftext|>": 151643,
11
+ "<|file_sep|>": 151664,
12
+ "<|fim_middle|>": 151660,
13
+ "<|fim_pad|>": 151662,
14
+ "<|fim_prefix|>": 151659,
15
+ "<|fim_suffix|>": 151661,
16
+ "<|im_end|>": 151645,
17
+ "<|im_start|>": 151644,
18
+ "<|image_pad|>": 151655,
19
+ "<|object_ref_end|>": 151647,
20
+ "<|object_ref_start|>": 151646,
21
+ "<|quad_end|>": 151651,
22
+ "<|quad_start|>": 151650,
23
+ "<|repo_name|>": 151663,
24
+ "<|video_pad|>": 151656,
25
+ "<|vision_end|>": 151653,
26
+ "<|vision_pad|>": 151654,
27
+ "<|vision_start|>": 151652
28
+ }
config.json CHANGED
@@ -98,9 +98,11 @@
98
  "num_bits": 4,
99
  "observer": "mse",
100
  "observer_kwargs": {},
 
101
  "strategy": "group",
102
  "symmetric": true,
103
- "type": "int"
 
104
  }
105
  }
106
  },
@@ -111,7 +113,6 @@
111
  "model.layers.0.linear_attn.in_proj_ba",
112
  "model.layers.0.linear_attn.out_proj",
113
  "model.layers.0.mlp.gate",
114
- "model.layers.0.mlp.shared_expert.gate_up_proj",
115
  "model.layers.0.mlp.shared_expert.gate_proj",
116
  "model.layers.0.mlp.shared_expert.up_proj",
117
  "model.layers.0.mlp.shared_expert.down_proj",
@@ -120,7 +121,6 @@
120
  "model.layers.1.linear_attn.in_proj_ba",
121
  "model.layers.1.linear_attn.out_proj",
122
  "model.layers.1.mlp.gate",
123
- "model.layers.1.mlp.shared_expert.gate_up_proj",
124
  "model.layers.1.mlp.shared_expert.gate_proj",
125
  "model.layers.1.mlp.shared_expert.up_proj",
126
  "model.layers.1.mlp.shared_expert.down_proj",
@@ -129,18 +129,15 @@
129
  "model.layers.2.linear_attn.in_proj_ba",
130
  "model.layers.2.linear_attn.out_proj",
131
  "model.layers.2.mlp.gate",
132
- "model.layers.2.mlp.shared_expert.gate_up_proj",
133
  "model.layers.2.mlp.shared_expert.gate_proj",
134
  "model.layers.2.mlp.shared_expert.up_proj",
135
  "model.layers.2.mlp.shared_expert.down_proj",
136
  "model.layers.2.mlp.shared_expert_gate",
137
- "model.layers.3.self_attn.qkv_proj",
138
  "model.layers.3.self_attn.q_proj",
139
  "model.layers.3.self_attn.k_proj",
140
  "model.layers.3.self_attn.v_proj",
141
  "model.layers.3.self_attn.o_proj",
142
  "model.layers.3.mlp.gate",
143
- "model.layers.3.mlp.shared_expert.gate_up_proj",
144
  "model.layers.3.mlp.shared_expert.gate_proj",
145
  "model.layers.3.mlp.shared_expert.up_proj",
146
  "model.layers.3.mlp.shared_expert.down_proj",
@@ -149,7 +146,6 @@
149
  "model.layers.4.linear_attn.in_proj_ba",
150
  "model.layers.4.linear_attn.out_proj",
151
  "model.layers.4.mlp.gate",
152
- "model.layers.4.mlp.shared_expert.gate_up_proj",
153
  "model.layers.4.mlp.shared_expert.gate_proj",
154
  "model.layers.4.mlp.shared_expert.up_proj",
155
  "model.layers.4.mlp.shared_expert.down_proj",
@@ -158,7 +154,6 @@
158
  "model.layers.5.linear_attn.in_proj_ba",
159
  "model.layers.5.linear_attn.out_proj",
160
  "model.layers.5.mlp.gate",
161
- "model.layers.5.mlp.shared_expert.gate_up_proj",
162
  "model.layers.5.mlp.shared_expert.gate_proj",
163
  "model.layers.5.mlp.shared_expert.up_proj",
164
  "model.layers.5.mlp.shared_expert.down_proj",
@@ -167,18 +162,15 @@
167
  "model.layers.6.linear_attn.in_proj_ba",
168
  "model.layers.6.linear_attn.out_proj",
169
  "model.layers.6.mlp.gate",
170
- "model.layers.6.mlp.shared_expert.gate_up_proj",
171
  "model.layers.6.mlp.shared_expert.gate_proj",
172
  "model.layers.6.mlp.shared_expert.up_proj",
173
  "model.layers.6.mlp.shared_expert.down_proj",
174
  "model.layers.6.mlp.shared_expert_gate",
175
- "model.layers.7.self_attn.qkv_proj",
176
  "model.layers.7.self_attn.q_proj",
177
  "model.layers.7.self_attn.k_proj",
178
  "model.layers.7.self_attn.v_proj",
179
  "model.layers.7.self_attn.o_proj",
180
  "model.layers.7.mlp.gate",
181
- "model.layers.7.mlp.shared_expert.gate_up_proj",
182
  "model.layers.7.mlp.shared_expert.gate_proj",
183
  "model.layers.7.mlp.shared_expert.up_proj",
184
  "model.layers.7.mlp.shared_expert.down_proj",
@@ -187,7 +179,6 @@
187
  "model.layers.8.linear_attn.in_proj_ba",
188
  "model.layers.8.linear_attn.out_proj",
189
  "model.layers.8.mlp.gate",
190
- "model.layers.8.mlp.shared_expert.gate_up_proj",
191
  "model.layers.8.mlp.shared_expert.gate_proj",
192
  "model.layers.8.mlp.shared_expert.up_proj",
193
  "model.layers.8.mlp.shared_expert.down_proj",
@@ -196,7 +187,6 @@
196
  "model.layers.9.linear_attn.in_proj_ba",
197
  "model.layers.9.linear_attn.out_proj",
198
  "model.layers.9.mlp.gate",
199
- "model.layers.9.mlp.shared_expert.gate_up_proj",
200
  "model.layers.9.mlp.shared_expert.gate_proj",
201
  "model.layers.9.mlp.shared_expert.up_proj",
202
  "model.layers.9.mlp.shared_expert.down_proj",
@@ -205,18 +195,15 @@
205
  "model.layers.10.linear_attn.in_proj_ba",
206
  "model.layers.10.linear_attn.out_proj",
207
  "model.layers.10.mlp.gate",
208
- "model.layers.10.mlp.shared_expert.gate_up_proj",
209
  "model.layers.10.mlp.shared_expert.gate_proj",
210
  "model.layers.10.mlp.shared_expert.up_proj",
211
  "model.layers.10.mlp.shared_expert.down_proj",
212
  "model.layers.10.mlp.shared_expert_gate",
213
- "model.layers.11.self_attn.qkv_proj",
214
  "model.layers.11.self_attn.q_proj",
215
  "model.layers.11.self_attn.k_proj",
216
  "model.layers.11.self_attn.v_proj",
217
  "model.layers.11.self_attn.o_proj",
218
  "model.layers.11.mlp.gate",
219
- "model.layers.11.mlp.shared_expert.gate_up_proj",
220
  "model.layers.11.mlp.shared_expert.gate_proj",
221
  "model.layers.11.mlp.shared_expert.up_proj",
222
  "model.layers.11.mlp.shared_expert.down_proj",
@@ -225,7 +212,6 @@
225
  "model.layers.12.linear_attn.in_proj_ba",
226
  "model.layers.12.linear_attn.out_proj",
227
  "model.layers.12.mlp.gate",
228
- "model.layers.12.mlp.shared_expert.gate_up_proj",
229
  "model.layers.12.mlp.shared_expert.gate_proj",
230
  "model.layers.12.mlp.shared_expert.up_proj",
231
  "model.layers.12.mlp.shared_expert.down_proj",
@@ -234,7 +220,6 @@
234
  "model.layers.13.linear_attn.in_proj_ba",
235
  "model.layers.13.linear_attn.out_proj",
236
  "model.layers.13.mlp.gate",
237
- "model.layers.13.mlp.shared_expert.gate_up_proj",
238
  "model.layers.13.mlp.shared_expert.gate_proj",
239
  "model.layers.13.mlp.shared_expert.up_proj",
240
  "model.layers.13.mlp.shared_expert.down_proj",
@@ -243,18 +228,15 @@
243
  "model.layers.14.linear_attn.in_proj_ba",
244
  "model.layers.14.linear_attn.out_proj",
245
  "model.layers.14.mlp.gate",
246
- "model.layers.14.mlp.shared_expert.gate_up_proj",
247
  "model.layers.14.mlp.shared_expert.gate_proj",
248
  "model.layers.14.mlp.shared_expert.up_proj",
249
  "model.layers.14.mlp.shared_expert.down_proj",
250
  "model.layers.14.mlp.shared_expert_gate",
251
- "model.layers.15.self_attn.qkv_proj",
252
  "model.layers.15.self_attn.q_proj",
253
  "model.layers.15.self_attn.k_proj",
254
  "model.layers.15.self_attn.v_proj",
255
  "model.layers.15.self_attn.o_proj",
256
  "model.layers.15.mlp.gate",
257
- "model.layers.15.mlp.shared_expert.gate_up_proj",
258
  "model.layers.15.mlp.shared_expert.gate_proj",
259
  "model.layers.15.mlp.shared_expert.up_proj",
260
  "model.layers.15.mlp.shared_expert.down_proj",
@@ -263,7 +245,6 @@
263
  "model.layers.16.linear_attn.in_proj_ba",
264
  "model.layers.16.linear_attn.out_proj",
265
  "model.layers.16.mlp.gate",
266
- "model.layers.16.mlp.shared_expert.gate_up_proj",
267
  "model.layers.16.mlp.shared_expert.gate_proj",
268
  "model.layers.16.mlp.shared_expert.up_proj",
269
  "model.layers.16.mlp.shared_expert.down_proj",
@@ -272,7 +253,6 @@
272
  "model.layers.17.linear_attn.in_proj_ba",
273
  "model.layers.17.linear_attn.out_proj",
274
  "model.layers.17.mlp.gate",
275
- "model.layers.17.mlp.shared_expert.gate_up_proj",
276
  "model.layers.17.mlp.shared_expert.gate_proj",
277
  "model.layers.17.mlp.shared_expert.up_proj",
278
  "model.layers.17.mlp.shared_expert.down_proj",
@@ -281,18 +261,15 @@
281
  "model.layers.18.linear_attn.in_proj_ba",
282
  "model.layers.18.linear_attn.out_proj",
283
  "model.layers.18.mlp.gate",
284
- "model.layers.18.mlp.shared_expert.gate_up_proj",
285
  "model.layers.18.mlp.shared_expert.gate_proj",
286
  "model.layers.18.mlp.shared_expert.up_proj",
287
  "model.layers.18.mlp.shared_expert.down_proj",
288
  "model.layers.18.mlp.shared_expert_gate",
289
- "model.layers.19.self_attn.qkv_proj",
290
  "model.layers.19.self_attn.q_proj",
291
  "model.layers.19.self_attn.k_proj",
292
  "model.layers.19.self_attn.v_proj",
293
  "model.layers.19.self_attn.o_proj",
294
  "model.layers.19.mlp.gate",
295
- "model.layers.19.mlp.shared_expert.gate_up_proj",
296
  "model.layers.19.mlp.shared_expert.gate_proj",
297
  "model.layers.19.mlp.shared_expert.up_proj",
298
  "model.layers.19.mlp.shared_expert.down_proj",
@@ -301,7 +278,6 @@
301
  "model.layers.20.linear_attn.in_proj_ba",
302
  "model.layers.20.linear_attn.out_proj",
303
  "model.layers.20.mlp.gate",
304
- "model.layers.20.mlp.shared_expert.gate_up_proj",
305
  "model.layers.20.mlp.shared_expert.gate_proj",
306
  "model.layers.20.mlp.shared_expert.up_proj",
307
  "model.layers.20.mlp.shared_expert.down_proj",
@@ -310,7 +286,6 @@
310
  "model.layers.21.linear_attn.in_proj_ba",
311
  "model.layers.21.linear_attn.out_proj",
312
  "model.layers.21.mlp.gate",
313
- "model.layers.21.mlp.shared_expert.gate_up_proj",
314
  "model.layers.21.mlp.shared_expert.gate_proj",
315
  "model.layers.21.mlp.shared_expert.up_proj",
316
  "model.layers.21.mlp.shared_expert.down_proj",
@@ -319,18 +294,15 @@
319
  "model.layers.22.linear_attn.in_proj_ba",
320
  "model.layers.22.linear_attn.out_proj",
321
  "model.layers.22.mlp.gate",
322
- "model.layers.22.mlp.shared_expert.gate_up_proj",
323
  "model.layers.22.mlp.shared_expert.gate_proj",
324
  "model.layers.22.mlp.shared_expert.up_proj",
325
  "model.layers.22.mlp.shared_expert.down_proj",
326
  "model.layers.22.mlp.shared_expert_gate",
327
- "model.layers.23.self_attn.qkv_proj",
328
  "model.layers.23.self_attn.q_proj",
329
  "model.layers.23.self_attn.k_proj",
330
  "model.layers.23.self_attn.v_proj",
331
  "model.layers.23.self_attn.o_proj",
332
  "model.layers.23.mlp.gate",
333
- "model.layers.23.mlp.shared_expert.gate_up_proj",
334
  "model.layers.23.mlp.shared_expert.gate_proj",
335
  "model.layers.23.mlp.shared_expert.up_proj",
336
  "model.layers.23.mlp.shared_expert.down_proj",
@@ -339,7 +311,6 @@
339
  "model.layers.24.linear_attn.in_proj_ba",
340
  "model.layers.24.linear_attn.out_proj",
341
  "model.layers.24.mlp.gate",
342
- "model.layers.24.mlp.shared_expert.gate_up_proj",
343
  "model.layers.24.mlp.shared_expert.gate_proj",
344
  "model.layers.24.mlp.shared_expert.up_proj",
345
  "model.layers.24.mlp.shared_expert.down_proj",
@@ -348,7 +319,6 @@
348
  "model.layers.25.linear_attn.in_proj_ba",
349
  "model.layers.25.linear_attn.out_proj",
350
  "model.layers.25.mlp.gate",
351
- "model.layers.25.mlp.shared_expert.gate_up_proj",
352
  "model.layers.25.mlp.shared_expert.gate_proj",
353
  "model.layers.25.mlp.shared_expert.up_proj",
354
  "model.layers.25.mlp.shared_expert.down_proj",
@@ -357,18 +327,15 @@
357
  "model.layers.26.linear_attn.in_proj_ba",
358
  "model.layers.26.linear_attn.out_proj",
359
  "model.layers.26.mlp.gate",
360
- "model.layers.26.mlp.shared_expert.gate_up_proj",
361
  "model.layers.26.mlp.shared_expert.gate_proj",
362
  "model.layers.26.mlp.shared_expert.up_proj",
363
  "model.layers.26.mlp.shared_expert.down_proj",
364
  "model.layers.26.mlp.shared_expert_gate",
365
- "model.layers.27.self_attn.qkv_proj",
366
  "model.layers.27.self_attn.q_proj",
367
  "model.layers.27.self_attn.k_proj",
368
  "model.layers.27.self_attn.v_proj",
369
  "model.layers.27.self_attn.o_proj",
370
  "model.layers.27.mlp.gate",
371
- "model.layers.27.mlp.shared_expert.gate_up_proj",
372
  "model.layers.27.mlp.shared_expert.gate_proj",
373
  "model.layers.27.mlp.shared_expert.up_proj",
374
  "model.layers.27.mlp.shared_expert.down_proj",
@@ -377,7 +344,6 @@
377
  "model.layers.28.linear_attn.in_proj_ba",
378
  "model.layers.28.linear_attn.out_proj",
379
  "model.layers.28.mlp.gate",
380
- "model.layers.28.mlp.shared_expert.gate_up_proj",
381
  "model.layers.28.mlp.shared_expert.gate_proj",
382
  "model.layers.28.mlp.shared_expert.up_proj",
383
  "model.layers.28.mlp.shared_expert.down_proj",
@@ -386,7 +352,6 @@
386
  "model.layers.29.linear_attn.in_proj_ba",
387
  "model.layers.29.linear_attn.out_proj",
388
  "model.layers.29.mlp.gate",
389
- "model.layers.29.mlp.shared_expert.gate_up_proj",
390
  "model.layers.29.mlp.shared_expert.gate_proj",
391
  "model.layers.29.mlp.shared_expert.up_proj",
392
  "model.layers.29.mlp.shared_expert.down_proj",
@@ -395,18 +360,15 @@
395
  "model.layers.30.linear_attn.in_proj_ba",
396
  "model.layers.30.linear_attn.out_proj",
397
  "model.layers.30.mlp.gate",
398
- "model.layers.30.mlp.shared_expert.gate_up_proj",
399
  "model.layers.30.mlp.shared_expert.gate_proj",
400
  "model.layers.30.mlp.shared_expert.up_proj",
401
  "model.layers.30.mlp.shared_expert.down_proj",
402
  "model.layers.30.mlp.shared_expert_gate",
403
- "model.layers.31.self_attn.qkv_proj",
404
  "model.layers.31.self_attn.q_proj",
405
  "model.layers.31.self_attn.k_proj",
406
  "model.layers.31.self_attn.v_proj",
407
  "model.layers.31.self_attn.o_proj",
408
  "model.layers.31.mlp.gate",
409
- "model.layers.31.mlp.shared_expert.gate_up_proj",
410
  "model.layers.31.mlp.shared_expert.gate_proj",
411
  "model.layers.31.mlp.shared_expert.up_proj",
412
  "model.layers.31.mlp.shared_expert.down_proj",
@@ -415,7 +377,6 @@
415
  "model.layers.32.linear_attn.in_proj_ba",
416
  "model.layers.32.linear_attn.out_proj",
417
  "model.layers.32.mlp.gate",
418
- "model.layers.32.mlp.shared_expert.gate_up_proj",
419
  "model.layers.32.mlp.shared_expert.gate_proj",
420
  "model.layers.32.mlp.shared_expert.up_proj",
421
  "model.layers.32.mlp.shared_expert.down_proj",
@@ -424,7 +385,6 @@
424
  "model.layers.33.linear_attn.in_proj_ba",
425
  "model.layers.33.linear_attn.out_proj",
426
  "model.layers.33.mlp.gate",
427
- "model.layers.33.mlp.shared_expert.gate_up_proj",
428
  "model.layers.33.mlp.shared_expert.gate_proj",
429
  "model.layers.33.mlp.shared_expert.up_proj",
430
  "model.layers.33.mlp.shared_expert.down_proj",
@@ -433,18 +393,15 @@
433
  "model.layers.34.linear_attn.in_proj_ba",
434
  "model.layers.34.linear_attn.out_proj",
435
  "model.layers.34.mlp.gate",
436
- "model.layers.34.mlp.shared_expert.gate_up_proj",
437
  "model.layers.34.mlp.shared_expert.gate_proj",
438
  "model.layers.34.mlp.shared_expert.up_proj",
439
  "model.layers.34.mlp.shared_expert.down_proj",
440
  "model.layers.34.mlp.shared_expert_gate",
441
- "model.layers.35.self_attn.qkv_proj",
442
  "model.layers.35.self_attn.q_proj",
443
  "model.layers.35.self_attn.k_proj",
444
  "model.layers.35.self_attn.v_proj",
445
  "model.layers.35.self_attn.o_proj",
446
  "model.layers.35.mlp.gate",
447
- "model.layers.35.mlp.shared_expert.gate_up_proj",
448
  "model.layers.35.mlp.shared_expert.gate_proj",
449
  "model.layers.35.mlp.shared_expert.up_proj",
450
  "model.layers.35.mlp.shared_expert.down_proj",
@@ -453,7 +410,6 @@
453
  "model.layers.36.linear_attn.in_proj_ba",
454
  "model.layers.36.linear_attn.out_proj",
455
  "model.layers.36.mlp.gate",
456
- "model.layers.36.mlp.shared_expert.gate_up_proj",
457
  "model.layers.36.mlp.shared_expert.gate_proj",
458
  "model.layers.36.mlp.shared_expert.up_proj",
459
  "model.layers.36.mlp.shared_expert.down_proj",
@@ -462,7 +418,6 @@
462
  "model.layers.37.linear_attn.in_proj_ba",
463
  "model.layers.37.linear_attn.out_proj",
464
  "model.layers.37.mlp.gate",
465
- "model.layers.37.mlp.shared_expert.gate_up_proj",
466
  "model.layers.37.mlp.shared_expert.gate_proj",
467
  "model.layers.37.mlp.shared_expert.up_proj",
468
  "model.layers.37.mlp.shared_expert.down_proj",
@@ -471,18 +426,15 @@
471
  "model.layers.38.linear_attn.in_proj_ba",
472
  "model.layers.38.linear_attn.out_proj",
473
  "model.layers.38.mlp.gate",
474
- "model.layers.38.mlp.shared_expert.gate_up_proj",
475
  "model.layers.38.mlp.shared_expert.gate_proj",
476
  "model.layers.38.mlp.shared_expert.up_proj",
477
  "model.layers.38.mlp.shared_expert.down_proj",
478
  "model.layers.38.mlp.shared_expert_gate",
479
- "model.layers.39.self_attn.qkv_proj",
480
  "model.layers.39.self_attn.q_proj",
481
  "model.layers.39.self_attn.k_proj",
482
  "model.layers.39.self_attn.v_proj",
483
  "model.layers.39.self_attn.o_proj",
484
  "model.layers.39.mlp.gate",
485
- "model.layers.39.mlp.shared_expert.gate_up_proj",
486
  "model.layers.39.mlp.shared_expert.gate_proj",
487
  "model.layers.39.mlp.shared_expert.up_proj",
488
  "model.layers.39.mlp.shared_expert.down_proj",
@@ -491,7 +443,6 @@
491
  "model.layers.40.linear_attn.in_proj_ba",
492
  "model.layers.40.linear_attn.out_proj",
493
  "model.layers.40.mlp.gate",
494
- "model.layers.40.mlp.shared_expert.gate_up_proj",
495
  "model.layers.40.mlp.shared_expert.gate_proj",
496
  "model.layers.40.mlp.shared_expert.up_proj",
497
  "model.layers.40.mlp.shared_expert.down_proj",
@@ -500,7 +451,6 @@
500
  "model.layers.41.linear_attn.in_proj_ba",
501
  "model.layers.41.linear_attn.out_proj",
502
  "model.layers.41.mlp.gate",
503
- "model.layers.41.mlp.shared_expert.gate_up_proj",
504
  "model.layers.41.mlp.shared_expert.gate_proj",
505
  "model.layers.41.mlp.shared_expert.up_proj",
506
  "model.layers.41.mlp.shared_expert.down_proj",
@@ -509,18 +459,15 @@
509
  "model.layers.42.linear_attn.in_proj_ba",
510
  "model.layers.42.linear_attn.out_proj",
511
  "model.layers.42.mlp.gate",
512
- "model.layers.42.mlp.shared_expert.gate_up_proj",
513
  "model.layers.42.mlp.shared_expert.gate_proj",
514
  "model.layers.42.mlp.shared_expert.up_proj",
515
  "model.layers.42.mlp.shared_expert.down_proj",
516
  "model.layers.42.mlp.shared_expert_gate",
517
- "model.layers.43.self_attn.qkv_proj",
518
  "model.layers.43.self_attn.q_proj",
519
  "model.layers.43.self_attn.k_proj",
520
  "model.layers.43.self_attn.v_proj",
521
  "model.layers.43.self_attn.o_proj",
522
  "model.layers.43.mlp.gate",
523
- "model.layers.43.mlp.shared_expert.gate_up_proj",
524
  "model.layers.43.mlp.shared_expert.gate_proj",
525
  "model.layers.43.mlp.shared_expert.up_proj",
526
  "model.layers.43.mlp.shared_expert.down_proj",
@@ -529,7 +476,6 @@
529
  "model.layers.44.linear_attn.in_proj_ba",
530
  "model.layers.44.linear_attn.out_proj",
531
  "model.layers.44.mlp.gate",
532
- "model.layers.44.mlp.shared_expert.gate_up_proj",
533
  "model.layers.44.mlp.shared_expert.gate_proj",
534
  "model.layers.44.mlp.shared_expert.up_proj",
535
  "model.layers.44.mlp.shared_expert.down_proj",
@@ -538,7 +484,6 @@
538
  "model.layers.45.linear_attn.in_proj_ba",
539
  "model.layers.45.linear_attn.out_proj",
540
  "model.layers.45.mlp.gate",
541
- "model.layers.45.mlp.shared_expert.gate_up_proj",
542
  "model.layers.45.mlp.shared_expert.gate_proj",
543
  "model.layers.45.mlp.shared_expert.up_proj",
544
  "model.layers.45.mlp.shared_expert.down_proj",
@@ -547,18 +492,15 @@
547
  "model.layers.46.linear_attn.in_proj_ba",
548
  "model.layers.46.linear_attn.out_proj",
549
  "model.layers.46.mlp.gate",
550
- "model.layers.46.mlp.shared_expert.gate_up_proj",
551
  "model.layers.46.mlp.shared_expert.gate_proj",
552
  "model.layers.46.mlp.shared_expert.up_proj",
553
  "model.layers.46.mlp.shared_expert.down_proj",
554
  "model.layers.46.mlp.shared_expert_gate",
555
- "model.layers.47.self_attn.qkv_proj",
556
  "model.layers.47.self_attn.q_proj",
557
  "model.layers.47.self_attn.k_proj",
558
  "model.layers.47.self_attn.v_proj",
559
  "model.layers.47.self_attn.o_proj",
560
  "model.layers.47.mlp.gate",
561
- "model.layers.47.mlp.shared_expert.gate_up_proj",
562
  "model.layers.47.mlp.shared_expert.gate_proj",
563
  "model.layers.47.mlp.shared_expert.up_proj",
564
  "model.layers.47.mlp.shared_expert.down_proj",
@@ -570,7 +512,7 @@
570
  "quantization_status": "compressed",
571
  "sparsity_config": {},
572
  "transform_config": {},
573
- "version": "0.12.3.a20251114"
574
  },
575
  "rms_norm_eps": 1e-06,
576
  "rope_scaling": null,
@@ -578,8 +520,8 @@
578
  "router_aux_loss_coef": 0.001,
579
  "shared_expert_intermediate_size": 512,
580
  "tie_word_embeddings": false,
581
- "transformers_version": "4.57.3",
582
  "use_cache": true,
583
  "use_sliding_window": false,
584
  "vocab_size": 151936
585
- }
 
98
  "num_bits": 4,
99
  "observer": "mse",
100
  "observer_kwargs": {},
101
+ "scale_dtype": null,
102
  "strategy": "group",
103
  "symmetric": true,
104
+ "type": "int",
105
+ "zp_dtype": null
106
  }
107
  }
108
  },
 
113
  "model.layers.0.linear_attn.in_proj_ba",
114
  "model.layers.0.linear_attn.out_proj",
115
  "model.layers.0.mlp.gate",
 
116
  "model.layers.0.mlp.shared_expert.gate_proj",
117
  "model.layers.0.mlp.shared_expert.up_proj",
118
  "model.layers.0.mlp.shared_expert.down_proj",
 
121
  "model.layers.1.linear_attn.in_proj_ba",
122
  "model.layers.1.linear_attn.out_proj",
123
  "model.layers.1.mlp.gate",
 
124
  "model.layers.1.mlp.shared_expert.gate_proj",
125
  "model.layers.1.mlp.shared_expert.up_proj",
126
  "model.layers.1.mlp.shared_expert.down_proj",
 
129
  "model.layers.2.linear_attn.in_proj_ba",
130
  "model.layers.2.linear_attn.out_proj",
131
  "model.layers.2.mlp.gate",
 
132
  "model.layers.2.mlp.shared_expert.gate_proj",
133
  "model.layers.2.mlp.shared_expert.up_proj",
134
  "model.layers.2.mlp.shared_expert.down_proj",
135
  "model.layers.2.mlp.shared_expert_gate",
 
136
  "model.layers.3.self_attn.q_proj",
137
  "model.layers.3.self_attn.k_proj",
138
  "model.layers.3.self_attn.v_proj",
139
  "model.layers.3.self_attn.o_proj",
140
  "model.layers.3.mlp.gate",
 
141
  "model.layers.3.mlp.shared_expert.gate_proj",
142
  "model.layers.3.mlp.shared_expert.up_proj",
143
  "model.layers.3.mlp.shared_expert.down_proj",
 
146
  "model.layers.4.linear_attn.in_proj_ba",
147
  "model.layers.4.linear_attn.out_proj",
148
  "model.layers.4.mlp.gate",
 
149
  "model.layers.4.mlp.shared_expert.gate_proj",
150
  "model.layers.4.mlp.shared_expert.up_proj",
151
  "model.layers.4.mlp.shared_expert.down_proj",
 
154
  "model.layers.5.linear_attn.in_proj_ba",
155
  "model.layers.5.linear_attn.out_proj",
156
  "model.layers.5.mlp.gate",
 
157
  "model.layers.5.mlp.shared_expert.gate_proj",
158
  "model.layers.5.mlp.shared_expert.up_proj",
159
  "model.layers.5.mlp.shared_expert.down_proj",
 
162
  "model.layers.6.linear_attn.in_proj_ba",
163
  "model.layers.6.linear_attn.out_proj",
164
  "model.layers.6.mlp.gate",
 
165
  "model.layers.6.mlp.shared_expert.gate_proj",
166
  "model.layers.6.mlp.shared_expert.up_proj",
167
  "model.layers.6.mlp.shared_expert.down_proj",
168
  "model.layers.6.mlp.shared_expert_gate",
 
169
  "model.layers.7.self_attn.q_proj",
170
  "model.layers.7.self_attn.k_proj",
171
  "model.layers.7.self_attn.v_proj",
172
  "model.layers.7.self_attn.o_proj",
173
  "model.layers.7.mlp.gate",
 
174
  "model.layers.7.mlp.shared_expert.gate_proj",
175
  "model.layers.7.mlp.shared_expert.up_proj",
176
  "model.layers.7.mlp.shared_expert.down_proj",
 
179
  "model.layers.8.linear_attn.in_proj_ba",
180
  "model.layers.8.linear_attn.out_proj",
181
  "model.layers.8.mlp.gate",
 
182
  "model.layers.8.mlp.shared_expert.gate_proj",
183
  "model.layers.8.mlp.shared_expert.up_proj",
184
  "model.layers.8.mlp.shared_expert.down_proj",
 
187
  "model.layers.9.linear_attn.in_proj_ba",
188
  "model.layers.9.linear_attn.out_proj",
189
  "model.layers.9.mlp.gate",
 
190
  "model.layers.9.mlp.shared_expert.gate_proj",
191
  "model.layers.9.mlp.shared_expert.up_proj",
192
  "model.layers.9.mlp.shared_expert.down_proj",
 
195
  "model.layers.10.linear_attn.in_proj_ba",
196
  "model.layers.10.linear_attn.out_proj",
197
  "model.layers.10.mlp.gate",
 
198
  "model.layers.10.mlp.shared_expert.gate_proj",
199
  "model.layers.10.mlp.shared_expert.up_proj",
200
  "model.layers.10.mlp.shared_expert.down_proj",
201
  "model.layers.10.mlp.shared_expert_gate",
 
202
  "model.layers.11.self_attn.q_proj",
203
  "model.layers.11.self_attn.k_proj",
204
  "model.layers.11.self_attn.v_proj",
205
  "model.layers.11.self_attn.o_proj",
206
  "model.layers.11.mlp.gate",
 
207
  "model.layers.11.mlp.shared_expert.gate_proj",
208
  "model.layers.11.mlp.shared_expert.up_proj",
209
  "model.layers.11.mlp.shared_expert.down_proj",
 
212
  "model.layers.12.linear_attn.in_proj_ba",
213
  "model.layers.12.linear_attn.out_proj",
214
  "model.layers.12.mlp.gate",
 
215
  "model.layers.12.mlp.shared_expert.gate_proj",
216
  "model.layers.12.mlp.shared_expert.up_proj",
217
  "model.layers.12.mlp.shared_expert.down_proj",
 
220
  "model.layers.13.linear_attn.in_proj_ba",
221
  "model.layers.13.linear_attn.out_proj",
222
  "model.layers.13.mlp.gate",
 
223
  "model.layers.13.mlp.shared_expert.gate_proj",
224
  "model.layers.13.mlp.shared_expert.up_proj",
225
  "model.layers.13.mlp.shared_expert.down_proj",
 
228
  "model.layers.14.linear_attn.in_proj_ba",
229
  "model.layers.14.linear_attn.out_proj",
230
  "model.layers.14.mlp.gate",
 
231
  "model.layers.14.mlp.shared_expert.gate_proj",
232
  "model.layers.14.mlp.shared_expert.up_proj",
233
  "model.layers.14.mlp.shared_expert.down_proj",
234
  "model.layers.14.mlp.shared_expert_gate",
 
235
  "model.layers.15.self_attn.q_proj",
236
  "model.layers.15.self_attn.k_proj",
237
  "model.layers.15.self_attn.v_proj",
238
  "model.layers.15.self_attn.o_proj",
239
  "model.layers.15.mlp.gate",
 
240
  "model.layers.15.mlp.shared_expert.gate_proj",
241
  "model.layers.15.mlp.shared_expert.up_proj",
242
  "model.layers.15.mlp.shared_expert.down_proj",
 
245
  "model.layers.16.linear_attn.in_proj_ba",
246
  "model.layers.16.linear_attn.out_proj",
247
  "model.layers.16.mlp.gate",
 
248
  "model.layers.16.mlp.shared_expert.gate_proj",
249
  "model.layers.16.mlp.shared_expert.up_proj",
250
  "model.layers.16.mlp.shared_expert.down_proj",
 
253
  "model.layers.17.linear_attn.in_proj_ba",
254
  "model.layers.17.linear_attn.out_proj",
255
  "model.layers.17.mlp.gate",
 
256
  "model.layers.17.mlp.shared_expert.gate_proj",
257
  "model.layers.17.mlp.shared_expert.up_proj",
258
  "model.layers.17.mlp.shared_expert.down_proj",
 
261
  "model.layers.18.linear_attn.in_proj_ba",
262
  "model.layers.18.linear_attn.out_proj",
263
  "model.layers.18.mlp.gate",
 
264
  "model.layers.18.mlp.shared_expert.gate_proj",
265
  "model.layers.18.mlp.shared_expert.up_proj",
266
  "model.layers.18.mlp.shared_expert.down_proj",
267
  "model.layers.18.mlp.shared_expert_gate",
 
268
  "model.layers.19.self_attn.q_proj",
269
  "model.layers.19.self_attn.k_proj",
270
  "model.layers.19.self_attn.v_proj",
271
  "model.layers.19.self_attn.o_proj",
272
  "model.layers.19.mlp.gate",
 
273
  "model.layers.19.mlp.shared_expert.gate_proj",
274
  "model.layers.19.mlp.shared_expert.up_proj",
275
  "model.layers.19.mlp.shared_expert.down_proj",
 
278
  "model.layers.20.linear_attn.in_proj_ba",
279
  "model.layers.20.linear_attn.out_proj",
280
  "model.layers.20.mlp.gate",
 
281
  "model.layers.20.mlp.shared_expert.gate_proj",
282
  "model.layers.20.mlp.shared_expert.up_proj",
283
  "model.layers.20.mlp.shared_expert.down_proj",
 
286
  "model.layers.21.linear_attn.in_proj_ba",
287
  "model.layers.21.linear_attn.out_proj",
288
  "model.layers.21.mlp.gate",
 
289
  "model.layers.21.mlp.shared_expert.gate_proj",
290
  "model.layers.21.mlp.shared_expert.up_proj",
291
  "model.layers.21.mlp.shared_expert.down_proj",
 
294
  "model.layers.22.linear_attn.in_proj_ba",
295
  "model.layers.22.linear_attn.out_proj",
296
  "model.layers.22.mlp.gate",
 
297
  "model.layers.22.mlp.shared_expert.gate_proj",
298
  "model.layers.22.mlp.shared_expert.up_proj",
299
  "model.layers.22.mlp.shared_expert.down_proj",
300
  "model.layers.22.mlp.shared_expert_gate",
 
301
  "model.layers.23.self_attn.q_proj",
302
  "model.layers.23.self_attn.k_proj",
303
  "model.layers.23.self_attn.v_proj",
304
  "model.layers.23.self_attn.o_proj",
305
  "model.layers.23.mlp.gate",
 
306
  "model.layers.23.mlp.shared_expert.gate_proj",
307
  "model.layers.23.mlp.shared_expert.up_proj",
308
  "model.layers.23.mlp.shared_expert.down_proj",
 
311
  "model.layers.24.linear_attn.in_proj_ba",
312
  "model.layers.24.linear_attn.out_proj",
313
  "model.layers.24.mlp.gate",
 
314
  "model.layers.24.mlp.shared_expert.gate_proj",
315
  "model.layers.24.mlp.shared_expert.up_proj",
316
  "model.layers.24.mlp.shared_expert.down_proj",
 
319
  "model.layers.25.linear_attn.in_proj_ba",
320
  "model.layers.25.linear_attn.out_proj",
321
  "model.layers.25.mlp.gate",
 
322
  "model.layers.25.mlp.shared_expert.gate_proj",
323
  "model.layers.25.mlp.shared_expert.up_proj",
324
  "model.layers.25.mlp.shared_expert.down_proj",
 
327
  "model.layers.26.linear_attn.in_proj_ba",
328
  "model.layers.26.linear_attn.out_proj",
329
  "model.layers.26.mlp.gate",
 
330
  "model.layers.26.mlp.shared_expert.gate_proj",
331
  "model.layers.26.mlp.shared_expert.up_proj",
332
  "model.layers.26.mlp.shared_expert.down_proj",
333
  "model.layers.26.mlp.shared_expert_gate",
 
334
  "model.layers.27.self_attn.q_proj",
335
  "model.layers.27.self_attn.k_proj",
336
  "model.layers.27.self_attn.v_proj",
337
  "model.layers.27.self_attn.o_proj",
338
  "model.layers.27.mlp.gate",
 
339
  "model.layers.27.mlp.shared_expert.gate_proj",
340
  "model.layers.27.mlp.shared_expert.up_proj",
341
  "model.layers.27.mlp.shared_expert.down_proj",
 
344
  "model.layers.28.linear_attn.in_proj_ba",
345
  "model.layers.28.linear_attn.out_proj",
346
  "model.layers.28.mlp.gate",
 
347
  "model.layers.28.mlp.shared_expert.gate_proj",
348
  "model.layers.28.mlp.shared_expert.up_proj",
349
  "model.layers.28.mlp.shared_expert.down_proj",
 
352
  "model.layers.29.linear_attn.in_proj_ba",
353
  "model.layers.29.linear_attn.out_proj",
354
  "model.layers.29.mlp.gate",
 
355
  "model.layers.29.mlp.shared_expert.gate_proj",
356
  "model.layers.29.mlp.shared_expert.up_proj",
357
  "model.layers.29.mlp.shared_expert.down_proj",
 
360
  "model.layers.30.linear_attn.in_proj_ba",
361
  "model.layers.30.linear_attn.out_proj",
362
  "model.layers.30.mlp.gate",
 
363
  "model.layers.30.mlp.shared_expert.gate_proj",
364
  "model.layers.30.mlp.shared_expert.up_proj",
365
  "model.layers.30.mlp.shared_expert.down_proj",
366
  "model.layers.30.mlp.shared_expert_gate",
 
367
  "model.layers.31.self_attn.q_proj",
368
  "model.layers.31.self_attn.k_proj",
369
  "model.layers.31.self_attn.v_proj",
370
  "model.layers.31.self_attn.o_proj",
371
  "model.layers.31.mlp.gate",
 
372
  "model.layers.31.mlp.shared_expert.gate_proj",
373
  "model.layers.31.mlp.shared_expert.up_proj",
374
  "model.layers.31.mlp.shared_expert.down_proj",
 
377
  "model.layers.32.linear_attn.in_proj_ba",
378
  "model.layers.32.linear_attn.out_proj",
379
  "model.layers.32.mlp.gate",
 
380
  "model.layers.32.mlp.shared_expert.gate_proj",
381
  "model.layers.32.mlp.shared_expert.up_proj",
382
  "model.layers.32.mlp.shared_expert.down_proj",
 
385
  "model.layers.33.linear_attn.in_proj_ba",
386
  "model.layers.33.linear_attn.out_proj",
387
  "model.layers.33.mlp.gate",
 
388
  "model.layers.33.mlp.shared_expert.gate_proj",
389
  "model.layers.33.mlp.shared_expert.up_proj",
390
  "model.layers.33.mlp.shared_expert.down_proj",
 
393
  "model.layers.34.linear_attn.in_proj_ba",
394
  "model.layers.34.linear_attn.out_proj",
395
  "model.layers.34.mlp.gate",
 
396
  "model.layers.34.mlp.shared_expert.gate_proj",
397
  "model.layers.34.mlp.shared_expert.up_proj",
398
  "model.layers.34.mlp.shared_expert.down_proj",
399
  "model.layers.34.mlp.shared_expert_gate",
 
400
  "model.layers.35.self_attn.q_proj",
401
  "model.layers.35.self_attn.k_proj",
402
  "model.layers.35.self_attn.v_proj",
403
  "model.layers.35.self_attn.o_proj",
404
  "model.layers.35.mlp.gate",
 
405
  "model.layers.35.mlp.shared_expert.gate_proj",
406
  "model.layers.35.mlp.shared_expert.up_proj",
407
  "model.layers.35.mlp.shared_expert.down_proj",
 
410
  "model.layers.36.linear_attn.in_proj_ba",
411
  "model.layers.36.linear_attn.out_proj",
412
  "model.layers.36.mlp.gate",
 
413
  "model.layers.36.mlp.shared_expert.gate_proj",
414
  "model.layers.36.mlp.shared_expert.up_proj",
415
  "model.layers.36.mlp.shared_expert.down_proj",
 
418
  "model.layers.37.linear_attn.in_proj_ba",
419
  "model.layers.37.linear_attn.out_proj",
420
  "model.layers.37.mlp.gate",
 
421
  "model.layers.37.mlp.shared_expert.gate_proj",
422
  "model.layers.37.mlp.shared_expert.up_proj",
423
  "model.layers.37.mlp.shared_expert.down_proj",
 
426
  "model.layers.38.linear_attn.in_proj_ba",
427
  "model.layers.38.linear_attn.out_proj",
428
  "model.layers.38.mlp.gate",
 
429
  "model.layers.38.mlp.shared_expert.gate_proj",
430
  "model.layers.38.mlp.shared_expert.up_proj",
431
  "model.layers.38.mlp.shared_expert.down_proj",
432
  "model.layers.38.mlp.shared_expert_gate",
 
433
  "model.layers.39.self_attn.q_proj",
434
  "model.layers.39.self_attn.k_proj",
435
  "model.layers.39.self_attn.v_proj",
436
  "model.layers.39.self_attn.o_proj",
437
  "model.layers.39.mlp.gate",
 
438
  "model.layers.39.mlp.shared_expert.gate_proj",
439
  "model.layers.39.mlp.shared_expert.up_proj",
440
  "model.layers.39.mlp.shared_expert.down_proj",
 
443
  "model.layers.40.linear_attn.in_proj_ba",
444
  "model.layers.40.linear_attn.out_proj",
445
  "model.layers.40.mlp.gate",
 
446
  "model.layers.40.mlp.shared_expert.gate_proj",
447
  "model.layers.40.mlp.shared_expert.up_proj",
448
  "model.layers.40.mlp.shared_expert.down_proj",
 
451
  "model.layers.41.linear_attn.in_proj_ba",
452
  "model.layers.41.linear_attn.out_proj",
453
  "model.layers.41.mlp.gate",
 
454
  "model.layers.41.mlp.shared_expert.gate_proj",
455
  "model.layers.41.mlp.shared_expert.up_proj",
456
  "model.layers.41.mlp.shared_expert.down_proj",
 
459
  "model.layers.42.linear_attn.in_proj_ba",
460
  "model.layers.42.linear_attn.out_proj",
461
  "model.layers.42.mlp.gate",
 
462
  "model.layers.42.mlp.shared_expert.gate_proj",
463
  "model.layers.42.mlp.shared_expert.up_proj",
464
  "model.layers.42.mlp.shared_expert.down_proj",
465
  "model.layers.42.mlp.shared_expert_gate",
 
466
  "model.layers.43.self_attn.q_proj",
467
  "model.layers.43.self_attn.k_proj",
468
  "model.layers.43.self_attn.v_proj",
469
  "model.layers.43.self_attn.o_proj",
470
  "model.layers.43.mlp.gate",
 
471
  "model.layers.43.mlp.shared_expert.gate_proj",
472
  "model.layers.43.mlp.shared_expert.up_proj",
473
  "model.layers.43.mlp.shared_expert.down_proj",
 
476
  "model.layers.44.linear_attn.in_proj_ba",
477
  "model.layers.44.linear_attn.out_proj",
478
  "model.layers.44.mlp.gate",
 
479
  "model.layers.44.mlp.shared_expert.gate_proj",
480
  "model.layers.44.mlp.shared_expert.up_proj",
481
  "model.layers.44.mlp.shared_expert.down_proj",
 
484
  "model.layers.45.linear_attn.in_proj_ba",
485
  "model.layers.45.linear_attn.out_proj",
486
  "model.layers.45.mlp.gate",
 
487
  "model.layers.45.mlp.shared_expert.gate_proj",
488
  "model.layers.45.mlp.shared_expert.up_proj",
489
  "model.layers.45.mlp.shared_expert.down_proj",
 
492
  "model.layers.46.linear_attn.in_proj_ba",
493
  "model.layers.46.linear_attn.out_proj",
494
  "model.layers.46.mlp.gate",
 
495
  "model.layers.46.mlp.shared_expert.gate_proj",
496
  "model.layers.46.mlp.shared_expert.up_proj",
497
  "model.layers.46.mlp.shared_expert.down_proj",
498
  "model.layers.46.mlp.shared_expert_gate",
 
499
  "model.layers.47.self_attn.q_proj",
500
  "model.layers.47.self_attn.k_proj",
501
  "model.layers.47.self_attn.v_proj",
502
  "model.layers.47.self_attn.o_proj",
503
  "model.layers.47.mlp.gate",
 
504
  "model.layers.47.mlp.shared_expert.gate_proj",
505
  "model.layers.47.mlp.shared_expert.up_proj",
506
  "model.layers.47.mlp.shared_expert.down_proj",
 
512
  "quantization_status": "compressed",
513
  "sparsity_config": {},
514
  "transform_config": {},
515
+ "version": "0.14.1.a20260320"
516
  },
517
  "rms_norm_eps": 1e-06,
518
  "rope_scaling": null,
 
520
  "router_aux_loss_coef": 0.001,
521
  "shared_expert_intermediate_size": 512,
522
  "tie_word_embeddings": false,
523
+ "transformers_version": "4.57.6",
524
  "use_cache": true,
525
  "use_sliding_window": false,
526
  "vocab_size": 151936
527
+ }
model-00001-of-00010.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f1bf29ad2b51bc3a992bb633305fbbc35bfc7a19d8324dce0b460907a0206cd
3
- size 5002226064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38346174c2fa8ef4a39bf6aad5ae43c3f098a14c92300834fde6d6e0ee9cff9d
3
+ size 5002291600
model-00002-of-00010.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd2520c7032699794c4ae513798fb04caeae32a53dd3b1291410f8be0efa91c6
3
- size 5002906536
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85ee28f274855d20371d07655b77b821b1524c66f1270dd226615e5e241ccd89
3
+ size 5002906672
model-00003-of-00010.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cda4dcded091b086486f276ccb2db651cbd9f646626976403e6b294324fc4bc9
3
- size 5002469472
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5b0825db924deaa583ca25c4c95f777ae98dc9522a39478fd024e949cf475a8
3
+ size 5002469336
model-00004-of-00010.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0fad74aeb225703cc4f3e4f45fc212e141c9bac69d55f9cfbc5f76becb502920
3
  size 5002480424
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a8ed95928c07be635a501a977aa0d6da7c3b458719368303fee4aae915a8895
3
  size 5002480424
model-00005-of-00010.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8862f785e8504de2e3e880e98f56c5e3b1880517b939fc0150f38cfb2faef162
3
- size 5002930176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67ba44e47240e145e8fbfc857a504df0c75301f617a2cee02e38692d032f38ab
3
+ size 5002930304
model-00006-of-00010.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c6ef760529dedc341ed3d627ad84208c9b7d9b0125c02667546cc8a2435926b0
3
- size 4991135440
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:532b7d3e9be96cdfe6398194be73340524224a1c02370ee24430e392b37bdc48
3
+ size 4991069776
model-00007-of-00010.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b175fc6655e94593f8aa41370ca3ce284a3eb80c57048ff8fb6f45ebaa58a0be
3
- size 5002894032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ede3407647ee761e9e2f2cd6e7df224ee0ebd0a89c5c1a010495a257c7641aa
3
+ size 5002893904
model-00008-of-00010.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ae4bea2067673d59efd9a6ccd1bed5351bd60c230b3e552dd15e52e9bffd43f
3
- size 5002929656
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93cb84f225732180ff1b75d3cd2190c466e88424986d12e4bbb37b9677fbe684
3
+ size 5002470896
model-00009-of-00010.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b117abba3c0ef744a40f3c5102720eb31680ddd72c423358f4dd88043a29f69
3
- size 5002471416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2065d4eb0add6733cdc1cfb162a281a61232e3877c60b09724da6f880ed0edf9
3
+ size 5002930168
model-00010-of-00010.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:64e087c100efbdf0890e7d19cbd7e515c406e9a8abe8dc27138f6dfe2f7d8b53
3
- size 3232720920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:befce0f17c85de8c9dc9f4fead748533b8d7d77225e08872fa260fb3715d17f9
3
+ size 3232721048
model.safetensors.index.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7de3074ca86e26d7798358028f7e09b856066ab3b0f6b76ce120b209dc1cd25d
3
  size 21565735
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1676e69c5ebbbba8b3ee983adf6fc469dc9afd976ff2e69a5cd49f563b5dc711
3
  size 21565735
recipe.yaml ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ default_stage:
2
+ default_modifiers:
3
+ AWQModifier:
4
+ config_groups:
5
+ group_0:
6
+ targets: [Linear]
7
+ weights:
8
+ num_bits: 4
9
+ type: int
10
+ symmetric: true
11
+ group_size: 32
12
+ strategy: group
13
+ block_structure: null
14
+ dynamic: false
15
+ actorder: null
16
+ scale_dtype: null
17
+ zp_dtype: null
18
+ observer: mse
19
+ observer_kwargs: {}
20
+ input_activations: null
21
+ output_activations: null
22
+ format: null
23
+ targets: [Linear]
24
+ ignore: [model.embed_tokens, 're:.*linear_attn.*', 're:.*norm.*', 're:.*shared_expert.*',
25
+ 're:.*shared_expert_gate$', 're:.*mlp[.]gate$', 're:.*router.*', 're:.*self_attn.*',
26
+ lm_head]
27
+ bypass_divisibility_checks: false
28
+ mappings:
29
+ - smooth_layer: re:.*post_attention_layernorm
30
+ balance_layers: ['re:.*mlp.gate', 're:.*mlp.shared_expert.gate_proj', 're:.*mlp.shared_expert.up_proj',
31
+ 're:.*mlp.shared_expert_gate', 're:.*mlp.experts.*gate_proj', 're:.*mlp.experts.*up_proj']
32
+ activation_hook_target: null
33
+ balance_exponent: 1
34
+ - smooth_layer: re:.*mlp.experts.*up_proj
35
+ balance_layers: ['re:.*mlp.experts.*down_proj']
36
+ activation_hook_target: null
37
+ balance_exponent: 1
38
+ offload_device: !!python/object/apply:torch.device [cpu]
39
+ duo_scaling: true
40
+ n_grid: 20
special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|im_end|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }