PKSGIN commited on
Commit
792236b
·
verified ·
1 Parent(s): ae1ec15

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3MoeForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "decoder_sparse_step": 1,
9
+ "eos_token_id": 151645,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 6144,
15
+ "max_position_embeddings": 40960,
16
+ "max_window_layers": 48,
17
+ "mlp_only_layers": [],
18
+ "model_type": "qwen3_moe",
19
+ "moe_intermediate_size": 768,
20
+ "norm_topk_prob": true,
21
+ "num_attention_heads": 32,
22
+ "num_experts": 128,
23
+ "num_experts_per_tok": 8,
24
+ "num_hidden_layers": 48,
25
+ "num_key_value_heads": 4,
26
+ "output_router_logits": false,
27
+ "rms_norm_eps": 1e-06,
28
+ "rope_scaling": null,
29
+ "rope_theta": 1000000.0,
30
+ "router_aux_loss_coef": 0.001,
31
+ "sliding_window": null,
32
+ "tie_word_embeddings": false,
33
+ "torch_dtype": "bfloat16",
34
+ "transformers_version": "4.51.0",
35
+ "use_cache": true,
36
+ "use_sliding_window": false,
37
+ "vocab_size": 151936,
38
+ "quantization": {
39
+ "group_size": 64,
40
+ "bits": 4
41
+ }
42
+ }
expert_profile.json ADDED
@@ -0,0 +1,910 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_path": "Qwen/Qwen3-30B-A3B",
3
+ "num_experts": 128,
4
+ "num_moe_layers": 48,
5
+ "top_k": 8,
6
+ "coding_samples": 200,
7
+ "general_samples": 200,
8
+ "coding_prompt_count": 25,
9
+ "general_prompt_count": 15,
10
+ "profiled_with": "pytorch",
11
+ "gpu": "NVIDIA A100-SXM4-80GB",
12
+ "expert_stats": {
13
+ "0": {
14
+ "coding_freq": 0.092768,
15
+ "general_freq": 0.061267,
16
+ "coding_ratio": 0.6023,
17
+ "total_activations_coding": 133728,
18
+ "total_activations_general": 85922
19
+ },
20
+ "1": {
21
+ "coding_freq": 0.040346,
22
+ "general_freq": 0.046556,
23
+ "coding_ratio": 0.4643,
24
+ "total_activations_coding": 58160,
25
+ "total_activations_general": 65291
26
+ },
27
+ "2": {
28
+ "coding_freq": 0.062112,
29
+ "general_freq": 0.064141,
30
+ "coding_ratio": 0.492,
31
+ "total_activations_coding": 89536,
32
+ "total_activations_general": 89953
33
+ },
34
+ "3": {
35
+ "coding_freq": 0.052794,
36
+ "general_freq": 0.092041,
37
+ "coding_ratio": 0.3645,
38
+ "total_activations_coding": 76104,
39
+ "total_activations_general": 129080
40
+ },
41
+ "4": {
42
+ "coding_freq": 0.043476,
43
+ "general_freq": 0.047703,
44
+ "coding_ratio": 0.4768,
45
+ "total_activations_coding": 62672,
46
+ "total_activations_general": 66899
47
+ },
48
+ "5": {
49
+ "coding_freq": 0.038859,
50
+ "general_freq": 0.037726,
51
+ "coding_ratio": 0.5074,
52
+ "total_activations_coding": 56016,
53
+ "total_activations_general": 52907
54
+ },
55
+ "6": {
56
+ "coding_freq": 0.050241,
57
+ "general_freq": 0.130322,
58
+ "coding_ratio": 0.2782,
59
+ "total_activations_coding": 72424,
60
+ "total_activations_general": 182766
61
+ },
62
+ "7": {
63
+ "coding_freq": 0.074249,
64
+ "general_freq": 0.052861,
65
+ "coding_ratio": 0.5841,
66
+ "total_activations_coding": 107032,
67
+ "total_activations_general": 74133
68
+ },
69
+ "8": {
70
+ "coding_freq": 0.081097,
71
+ "general_freq": 0.092035,
72
+ "coding_ratio": 0.4684,
73
+ "total_activations_coding": 116904,
74
+ "total_activations_general": 129072
75
+ },
76
+ "9": {
77
+ "coding_freq": 0.079199,
78
+ "general_freq": 0.067423,
79
+ "coding_ratio": 0.5402,
80
+ "total_activations_coding": 114168,
81
+ "total_activations_general": 94555
82
+ },
83
+ "10": {
84
+ "coding_freq": 0.063554,
85
+ "general_freq": 0.083043,
86
+ "coding_ratio": 0.4335,
87
+ "total_activations_coding": 91616,
88
+ "total_activations_general": 116461
89
+ },
90
+ "11": {
91
+ "coding_freq": 0.034363,
92
+ "general_freq": 0.034016,
93
+ "coding_ratio": 0.5025,
94
+ "total_activations_coding": 49536,
95
+ "total_activations_general": 47704
96
+ },
97
+ "12": {
98
+ "coding_freq": 0.037027,
99
+ "general_freq": 0.051906,
100
+ "coding_ratio": 0.4163,
101
+ "total_activations_coding": 53376,
102
+ "total_activations_general": 72794
103
+ },
104
+ "13": {
105
+ "coding_freq": 0.049547,
106
+ "general_freq": 0.056121,
107
+ "coding_ratio": 0.4689,
108
+ "total_activations_coding": 71424,
109
+ "total_activations_general": 78705
110
+ },
111
+ "14": {
112
+ "coding_freq": 0.063754,
113
+ "general_freq": 0.07474,
114
+ "coding_ratio": 0.4603,
115
+ "total_activations_coding": 91904,
116
+ "total_activations_general": 104817
117
+ },
118
+ "15": {
119
+ "coding_freq": 0.06013,
120
+ "general_freq": 0.061003,
121
+ "coding_ratio": 0.4964,
122
+ "total_activations_coding": 86680,
123
+ "total_activations_general": 85551
124
+ },
125
+ "16": {
126
+ "coding_freq": 0.063516,
127
+ "general_freq": 0.07144,
128
+ "coding_ratio": 0.4706,
129
+ "total_activations_coding": 91560,
130
+ "total_activations_general": 100188
131
+ },
132
+ "17": {
133
+ "coding_freq": 0.061457,
134
+ "general_freq": 0.055548,
135
+ "coding_ratio": 0.5252,
136
+ "total_activations_coding": 88592,
137
+ "total_activations_general": 77902
138
+ },
139
+ "18": {
140
+ "coding_freq": 0.065613,
141
+ "general_freq": 0.093508,
142
+ "coding_ratio": 0.4123,
143
+ "total_activations_coding": 94584,
144
+ "total_activations_general": 131137
145
+ },
146
+ "19": {
147
+ "coding_freq": 0.056562,
148
+ "general_freq": 0.040124,
149
+ "coding_ratio": 0.585,
150
+ "total_activations_coding": 81536,
151
+ "total_activations_general": 56270
152
+ },
153
+ "20": {
154
+ "coding_freq": 0.077861,
155
+ "general_freq": 0.062172,
156
+ "coding_ratio": 0.556,
157
+ "total_activations_coding": 112240,
158
+ "total_activations_general": 87191
159
+ },
160
+ "21": {
161
+ "coding_freq": 0.113773,
162
+ "general_freq": 0.0502,
163
+ "coding_ratio": 0.6939,
164
+ "total_activations_coding": 164008,
165
+ "total_activations_general": 70401
166
+ },
167
+ "22": {
168
+ "coding_freq": 0.037138,
169
+ "general_freq": 0.065625,
170
+ "coding_ratio": 0.3614,
171
+ "total_activations_coding": 53536,
172
+ "total_activations_general": 92034
173
+ },
174
+ "23": {
175
+ "coding_freq": 0.075519,
176
+ "general_freq": 0.067415,
177
+ "coding_ratio": 0.5283,
178
+ "total_activations_coding": 108864,
179
+ "total_activations_general": 94544
180
+ },
181
+ "24": {
182
+ "coding_freq": 0.050196,
183
+ "general_freq": 0.067031,
184
+ "coding_ratio": 0.4282,
185
+ "total_activations_coding": 72360,
186
+ "total_activations_general": 94005
187
+ },
188
+ "25": {
189
+ "coding_freq": 0.064698,
190
+ "general_freq": 0.077517,
191
+ "coding_ratio": 0.4549,
192
+ "total_activations_coding": 93264,
193
+ "total_activations_general": 108711
194
+ },
195
+ "26": {
196
+ "coding_freq": 0.055435,
197
+ "general_freq": 0.072553,
198
+ "coding_ratio": 0.4331,
199
+ "total_activations_coding": 79912,
200
+ "total_activations_general": 101750
201
+ },
202
+ "27": {
203
+ "coding_freq": 0.067977,
204
+ "general_freq": 0.031524,
205
+ "coding_ratio": 0.6832,
206
+ "total_activations_coding": 97992,
207
+ "total_activations_general": 44210
208
+ },
209
+ "28": {
210
+ "coding_freq": 0.05866,
211
+ "general_freq": 0.054877,
212
+ "coding_ratio": 0.5167,
213
+ "total_activations_coding": 84560,
214
+ "total_activations_general": 76961
215
+ },
216
+ "29": {
217
+ "coding_freq": 0.055485,
218
+ "general_freq": 0.038545,
219
+ "coding_ratio": 0.5901,
220
+ "total_activations_coding": 79984,
221
+ "total_activations_general": 54056
222
+ },
223
+ "30": {
224
+ "coding_freq": 0.070414,
225
+ "general_freq": 0.057319,
226
+ "coding_ratio": 0.5513,
227
+ "total_activations_coding": 101504,
228
+ "total_activations_general": 80385
229
+ },
230
+ "31": {
231
+ "coding_freq": 0.07305,
232
+ "general_freq": 0.038923,
233
+ "coding_ratio": 0.6524,
234
+ "total_activations_coding": 105304,
235
+ "total_activations_general": 54586
236
+ },
237
+ "32": {
238
+ "coding_freq": 0.063504,
239
+ "general_freq": 0.051906,
240
+ "coding_ratio": 0.5502,
241
+ "total_activations_coding": 91544,
242
+ "total_activations_general": 72794
243
+ },
244
+ "33": {
245
+ "coding_freq": 0.022676,
246
+ "general_freq": 0.035744,
247
+ "coding_ratio": 0.3882,
248
+ "total_activations_coding": 32688,
249
+ "total_activations_general": 50128
250
+ },
251
+ "34": {
252
+ "coding_freq": 0.09092,
253
+ "general_freq": 0.079021,
254
+ "coding_ratio": 0.535,
255
+ "total_activations_coding": 131064,
256
+ "total_activations_general": 110821
257
+ },
258
+ "35": {
259
+ "coding_freq": 0.058915,
260
+ "general_freq": 0.05607,
261
+ "coding_ratio": 0.5124,
262
+ "total_activations_coding": 84928,
263
+ "total_activations_general": 78633
264
+ },
265
+ "36": {
266
+ "coding_freq": 0.079537,
267
+ "general_freq": 0.051724,
268
+ "coding_ratio": 0.6059,
269
+ "total_activations_coding": 114656,
270
+ "total_activations_general": 72538
271
+ },
272
+ "37": {
273
+ "coding_freq": 0.059781,
274
+ "general_freq": 0.072444,
275
+ "coding_ratio": 0.4521,
276
+ "total_activations_coding": 86176,
277
+ "total_activations_general": 101596
278
+ },
279
+ "38": {
280
+ "coding_freq": 0.043265,
281
+ "general_freq": 0.05503,
282
+ "coding_ratio": 0.4402,
283
+ "total_activations_coding": 62368,
284
+ "total_activations_general": 77175
285
+ },
286
+ "39": {
287
+ "coding_freq": 0.052982,
288
+ "general_freq": 0.082727,
289
+ "coding_ratio": 0.3904,
290
+ "total_activations_coding": 76376,
291
+ "total_activations_general": 116017
292
+ },
293
+ "40": {
294
+ "coding_freq": 0.064809,
295
+ "general_freq": 0.09718,
296
+ "coding_ratio": 0.4001,
297
+ "total_activations_coding": 93424,
298
+ "total_activations_general": 136287
299
+ },
300
+ "41": {
301
+ "coding_freq": 0.043165,
302
+ "general_freq": 0.040845,
303
+ "coding_ratio": 0.5138,
304
+ "total_activations_coding": 62224,
305
+ "total_activations_general": 57282
306
+ },
307
+ "42": {
308
+ "coding_freq": 0.058343,
309
+ "general_freq": 0.068399,
310
+ "coding_ratio": 0.4603,
311
+ "total_activations_coding": 84104,
312
+ "total_activations_general": 95924
313
+ },
314
+ "43": {
315
+ "coding_freq": 0.089488,
316
+ "general_freq": 0.042612,
317
+ "coding_ratio": 0.6774,
318
+ "total_activations_coding": 129000,
319
+ "total_activations_general": 59760
320
+ },
321
+ "44": {
322
+ "coding_freq": 0.050491,
323
+ "general_freq": 0.059014,
324
+ "coding_ratio": 0.4611,
325
+ "total_activations_coding": 72784,
326
+ "total_activations_general": 82762
327
+ },
328
+ "45": {
329
+ "coding_freq": 0.080237,
330
+ "general_freq": 0.061308,
331
+ "coding_ratio": 0.5669,
332
+ "total_activations_coding": 115664,
333
+ "total_activations_general": 85979
334
+ },
335
+ "46": {
336
+ "coding_freq": 0.058649,
337
+ "general_freq": 0.057986,
338
+ "coding_ratio": 0.5028,
339
+ "total_activations_coding": 84544,
340
+ "total_activations_general": 81321
341
+ },
342
+ "47": {
343
+ "coding_freq": 0.071085,
344
+ "general_freq": 0.070113,
345
+ "coding_ratio": 0.5034,
346
+ "total_activations_coding": 102472,
347
+ "total_activations_general": 98327
348
+ },
349
+ "48": {
350
+ "coding_freq": 0.048848,
351
+ "general_freq": 0.053544,
352
+ "coding_ratio": 0.4771,
353
+ "total_activations_coding": 70416,
354
+ "total_activations_general": 75091
355
+ },
356
+ "49": {
357
+ "coding_freq": 0.064737,
358
+ "general_freq": 0.087229,
359
+ "coding_ratio": 0.426,
360
+ "total_activations_coding": 93320,
361
+ "total_activations_general": 122332
362
+ },
363
+ "50": {
364
+ "coding_freq": 0.049791,
365
+ "general_freq": 0.056128,
366
+ "coding_ratio": 0.4701,
367
+ "total_activations_coding": 71776,
368
+ "total_activations_general": 78715
369
+ },
370
+ "51": {
371
+ "coding_freq": 0.088395,
372
+ "general_freq": 0.097773,
373
+ "coding_ratio": 0.4748,
374
+ "total_activations_coding": 127424,
375
+ "total_activations_general": 137118
376
+ },
377
+ "52": {
378
+ "coding_freq": 0.05382,
379
+ "general_freq": 0.080866,
380
+ "coding_ratio": 0.3996,
381
+ "total_activations_coding": 77584,
382
+ "total_activations_general": 113408
383
+ },
384
+ "53": {
385
+ "coding_freq": 0.05639,
386
+ "general_freq": 0.072268,
387
+ "coding_ratio": 0.4383,
388
+ "total_activations_coding": 81288,
389
+ "total_activations_general": 101350
390
+ },
391
+ "54": {
392
+ "coding_freq": 0.077334,
393
+ "general_freq": 0.047876,
394
+ "coding_ratio": 0.6176,
395
+ "total_activations_coding": 111480,
396
+ "total_activations_general": 67142
397
+ },
398
+ "55": {
399
+ "coding_freq": 0.05735,
400
+ "general_freq": 0.062076,
401
+ "coding_ratio": 0.4802,
402
+ "total_activations_coding": 82672,
403
+ "total_activations_general": 87056
404
+ },
405
+ "56": {
406
+ "coding_freq": 0.066768,
407
+ "general_freq": 0.081151,
408
+ "coding_ratio": 0.4514,
409
+ "total_activations_coding": 96248,
410
+ "total_activations_general": 113807
411
+ },
412
+ "57": {
413
+ "coding_freq": 0.035218,
414
+ "general_freq": 0.060223,
415
+ "coding_ratio": 0.369,
416
+ "total_activations_coding": 50768,
417
+ "total_activations_general": 84457
418
+ },
419
+ "58": {
420
+ "coding_freq": 0.052361,
421
+ "general_freq": 0.064346,
422
+ "coding_ratio": 0.4487,
423
+ "total_activations_coding": 75480,
424
+ "total_activations_general": 90240
425
+ },
426
+ "59": {
427
+ "coding_freq": 0.100398,
428
+ "general_freq": 0.037102,
429
+ "coding_ratio": 0.7302,
430
+ "total_activations_coding": 144728,
431
+ "total_activations_general": 52033
432
+ },
433
+ "60": {
434
+ "coding_freq": 0.070292,
435
+ "general_freq": 0.06294,
436
+ "coding_ratio": 0.5276,
437
+ "total_activations_coding": 101328,
438
+ "total_activations_general": 88268
439
+ },
440
+ "61": {
441
+ "coding_freq": 0.060813,
442
+ "general_freq": 0.089756,
443
+ "coding_ratio": 0.4039,
444
+ "total_activations_coding": 87664,
445
+ "total_activations_general": 125875
446
+ },
447
+ "62": {
448
+ "coding_freq": 0.055119,
449
+ "general_freq": 0.05481,
450
+ "coding_ratio": 0.5014,
451
+ "total_activations_coding": 79456,
452
+ "total_activations_general": 76866
453
+ },
454
+ "63": {
455
+ "coding_freq": 0.049098,
456
+ "general_freq": 0.047947,
457
+ "coding_ratio": 0.5059,
458
+ "total_activations_coding": 70776,
459
+ "total_activations_general": 67242
460
+ },
461
+ "64": {
462
+ "coding_freq": 0.078688,
463
+ "general_freq": 0.082368,
464
+ "coding_ratio": 0.4886,
465
+ "total_activations_coding": 113432,
466
+ "total_activations_general": 115514
467
+ },
468
+ "65": {
469
+ "coding_freq": 0.076602,
470
+ "general_freq": 0.07032,
471
+ "coding_ratio": 0.5214,
472
+ "total_activations_coding": 110424,
473
+ "total_activations_general": 98618
474
+ },
475
+ "66": {
476
+ "coding_freq": 0.079482,
477
+ "general_freq": 0.040393,
478
+ "coding_ratio": 0.663,
479
+ "total_activations_coding": 114576,
480
+ "total_activations_general": 56648
481
+ },
482
+ "67": {
483
+ "coding_freq": 0.074815,
484
+ "general_freq": 0.043763,
485
+ "coding_ratio": 0.6309,
486
+ "total_activations_coding": 107848,
487
+ "total_activations_general": 61374
488
+ },
489
+ "68": {
490
+ "coding_freq": 0.069648,
491
+ "general_freq": 0.049204,
492
+ "coding_ratio": 0.586,
493
+ "total_activations_coding": 100400,
494
+ "total_activations_general": 69004
495
+ },
496
+ "69": {
497
+ "coding_freq": 0.04549,
498
+ "general_freq": 0.05191,
499
+ "coding_ratio": 0.467,
500
+ "total_activations_coding": 65576,
501
+ "total_activations_general": 72800
502
+ },
503
+ "70": {
504
+ "coding_freq": 0.070491,
505
+ "general_freq": 0.047402,
506
+ "coding_ratio": 0.5979,
507
+ "total_activations_coding": 101616,
508
+ "total_activations_general": 66477
509
+ },
510
+ "71": {
511
+ "coding_freq": 0.117208,
512
+ "general_freq": 0.05398,
513
+ "coding_ratio": 0.6847,
514
+ "total_activations_coding": 168960,
515
+ "total_activations_general": 75702
516
+ },
517
+ "72": {
518
+ "coding_freq": 0.044774,
519
+ "general_freq": 0.036503,
520
+ "coding_ratio": 0.5509,
521
+ "total_activations_coding": 64544,
522
+ "total_activations_general": 51192
523
+ },
524
+ "73": {
525
+ "coding_freq": 0.058893,
526
+ "general_freq": 0.08037,
527
+ "coding_ratio": 0.4229,
528
+ "total_activations_coding": 84896,
529
+ "total_activations_general": 112712
530
+ },
531
+ "74": {
532
+ "coding_freq": 0.045357,
533
+ "general_freq": 0.065604,
534
+ "coding_ratio": 0.4088,
535
+ "total_activations_coding": 65384,
536
+ "total_activations_general": 92004
537
+ },
538
+ "75": {
539
+ "coding_freq": 0.067406,
540
+ "general_freq": 0.072099,
541
+ "coding_ratio": 0.4832,
542
+ "total_activations_coding": 97168,
543
+ "total_activations_general": 101113
544
+ },
545
+ "76": {
546
+ "coding_freq": 0.076524,
547
+ "general_freq": 0.066928,
548
+ "coding_ratio": 0.5334,
549
+ "total_activations_coding": 110312,
550
+ "total_activations_general": 93861
551
+ },
552
+ "77": {
553
+ "coding_freq": 0.042211,
554
+ "general_freq": 0.061592,
555
+ "coding_ratio": 0.4066,
556
+ "total_activations_coding": 60848,
557
+ "total_activations_general": 86377
558
+ },
559
+ "78": {
560
+ "coding_freq": 0.060613,
561
+ "general_freq": 0.048905,
562
+ "coding_ratio": 0.5535,
563
+ "total_activations_coding": 87376,
564
+ "total_activations_general": 68585
565
+ },
566
+ "79": {
567
+ "coding_freq": 0.056728,
568
+ "general_freq": 0.076601,
569
+ "coding_ratio": 0.4255,
570
+ "total_activations_coding": 81776,
571
+ "total_activations_general": 107426
572
+ },
573
+ "80": {
574
+ "coding_freq": 0.078167,
575
+ "general_freq": 0.08051,
576
+ "coding_ratio": 0.4926,
577
+ "total_activations_coding": 112680,
578
+ "total_activations_general": 112908
579
+ },
580
+ "81": {
581
+ "coding_freq": 0.031128,
582
+ "general_freq": 0.044246,
583
+ "coding_ratio": 0.413,
584
+ "total_activations_coding": 44872,
585
+ "total_activations_general": 62051
586
+ },
587
+ "82": {
588
+ "coding_freq": 0.070996,
589
+ "general_freq": 0.069663,
590
+ "coding_ratio": 0.5047,
591
+ "total_activations_coding": 102344,
592
+ "total_activations_general": 97697
593
+ },
594
+ "83": {
595
+ "coding_freq": 0.050801,
596
+ "general_freq": 0.079166,
597
+ "coding_ratio": 0.3909,
598
+ "total_activations_coding": 73232,
599
+ "total_activations_general": 111024
600
+ },
601
+ "84": {
602
+ "coding_freq": 0.090886,
603
+ "general_freq": 0.051819,
604
+ "coding_ratio": 0.6369,
605
+ "total_activations_coding": 131016,
606
+ "total_activations_general": 72672
607
+ },
608
+ "85": {
609
+ "coding_freq": 0.053138,
610
+ "general_freq": 0.080619,
611
+ "coding_ratio": 0.3973,
612
+ "total_activations_coding": 76600,
613
+ "total_activations_general": 113062
614
+ },
615
+ "86": {
616
+ "coding_freq": 0.044836,
617
+ "general_freq": 0.071893,
618
+ "coding_ratio": 0.3841,
619
+ "total_activations_coding": 64632,
620
+ "total_activations_general": 100824
621
+ },
622
+ "87": {
623
+ "coding_freq": 0.056401,
624
+ "general_freq": 0.066954,
625
+ "coding_ratio": 0.4572,
626
+ "total_activations_coding": 81304,
627
+ "total_activations_general": 93897
628
+ },
629
+ "88": {
630
+ "coding_freq": 0.056984,
631
+ "general_freq": 0.042705,
632
+ "coding_ratio": 0.5716,
633
+ "total_activations_coding": 82144,
634
+ "total_activations_general": 59890
635
+ },
636
+ "89": {
637
+ "coding_freq": 0.04887,
638
+ "general_freq": 0.077348,
639
+ "coding_ratio": 0.3872,
640
+ "total_activations_coding": 70448,
641
+ "total_activations_general": 108474
642
+ },
643
+ "90": {
644
+ "coding_freq": 0.06962,
645
+ "general_freq": 0.053903,
646
+ "coding_ratio": 0.5636,
647
+ "total_activations_coding": 100360,
648
+ "total_activations_general": 75595
649
+ },
650
+ "91": {
651
+ "coding_freq": 0.057683,
652
+ "general_freq": 0.048082,
653
+ "coding_ratio": 0.5454,
654
+ "total_activations_coding": 83152,
655
+ "total_activations_general": 67431
656
+ },
657
+ "92": {
658
+ "coding_freq": 0.043115,
659
+ "general_freq": 0.043949,
660
+ "coding_ratio": 0.4952,
661
+ "total_activations_coding": 62152,
662
+ "total_activations_general": 61635
663
+ },
664
+ "93": {
665
+ "coding_freq": 0.042305,
666
+ "general_freq": 0.043496,
667
+ "coding_ratio": 0.4931,
668
+ "total_activations_coding": 60984,
669
+ "total_activations_general": 60999
670
+ },
671
+ "94": {
672
+ "coding_freq": 0.050923,
673
+ "general_freq": 0.055471,
674
+ "coding_ratio": 0.4786,
675
+ "total_activations_coding": 73408,
676
+ "total_activations_general": 77793
677
+ },
678
+ "95": {
679
+ "coding_freq": 0.057572,
680
+ "general_freq": 0.077283,
681
+ "coding_ratio": 0.4269,
682
+ "total_activations_coding": 82992,
683
+ "total_activations_general": 108383
684
+ },
685
+ "96": {
686
+ "coding_freq": 0.060735,
687
+ "general_freq": 0.083381,
688
+ "coding_ratio": 0.4214,
689
+ "total_activations_coding": 87552,
690
+ "total_activations_general": 116935
691
+ },
692
+ "97": {
693
+ "coding_freq": 0.09092,
694
+ "general_freq": 0.083681,
695
+ "coding_ratio": 0.5207,
696
+ "total_activations_coding": 131064,
697
+ "total_activations_general": 117355
698
+ },
699
+ "98": {
700
+ "coding_freq": 0.074709,
701
+ "general_freq": 0.07769,
702
+ "coding_ratio": 0.4902,
703
+ "total_activations_coding": 107696,
704
+ "total_activations_general": 108954
705
+ },
706
+ "99": {
707
+ "coding_freq": 0.036988,
708
+ "general_freq": 0.054347,
709
+ "coding_ratio": 0.405,
710
+ "total_activations_coding": 53320,
711
+ "total_activations_general": 76217
712
+ },
713
+ "100": {
714
+ "coding_freq": 0.048526,
715
+ "general_freq": 0.07014,
716
+ "coding_ratio": 0.4089,
717
+ "total_activations_coding": 69952,
718
+ "total_activations_general": 98366
719
+ },
720
+ "101": {
721
+ "coding_freq": 0.057461,
722
+ "general_freq": 0.064052,
723
+ "coding_ratio": 0.4729,
724
+ "total_activations_coding": 82832,
725
+ "total_activations_general": 89827
726
+ },
727
+ "102": {
728
+ "coding_freq": 0.05609,
729
+ "general_freq": 0.069795,
730
+ "coding_ratio": 0.4456,
731
+ "total_activations_coding": 80856,
732
+ "total_activations_general": 97881
733
+ },
734
+ "103": {
735
+ "coding_freq": 0.096558,
736
+ "general_freq": 0.106774,
737
+ "coding_ratio": 0.4749,
738
+ "total_activations_coding": 139192,
739
+ "total_activations_general": 149741
740
+ },
741
+ "104": {
742
+ "coding_freq": 0.092096,
743
+ "general_freq": 0.070999,
744
+ "coding_ratio": 0.5647,
745
+ "total_activations_coding": 132760,
746
+ "total_activations_general": 99570
747
+ },
748
+ "105": {
749
+ "coding_freq": 0.076119,
750
+ "general_freq": 0.061936,
751
+ "coding_ratio": 0.5514,
752
+ "total_activations_coding": 109728,
753
+ "total_activations_general": 86860
754
+ },
755
+ "106": {
756
+ "coding_freq": 0.032205,
757
+ "general_freq": 0.048427,
758
+ "coding_ratio": 0.3994,
759
+ "total_activations_coding": 46424,
760
+ "total_activations_general": 67915
761
+ },
762
+ "107": {
763
+ "coding_freq": 0.058299,
764
+ "general_freq": 0.06196,
765
+ "coding_ratio": 0.4848,
766
+ "total_activations_coding": 84040,
767
+ "total_activations_general": 86893
768
+ },
769
+ "108": {
770
+ "coding_freq": 0.025939,
771
+ "general_freq": 0.054532,
772
+ "coding_ratio": 0.3223,
773
+ "total_activations_coding": 37392,
774
+ "total_activations_general": 76476
775
+ },
776
+ "109": {
777
+ "coding_freq": 0.053387,
778
+ "general_freq": 0.064083,
779
+ "coding_ratio": 0.4545,
780
+ "total_activations_coding": 76960,
781
+ "total_activations_general": 89871
782
+ },
783
+ "110": {
784
+ "coding_freq": 0.062822,
785
+ "general_freq": 0.075316,
786
+ "coding_ratio": 0.4548,
787
+ "total_activations_coding": 90560,
788
+ "total_activations_general": 105625
789
+ },
790
+ "111": {
791
+ "coding_freq": 0.058582,
792
+ "general_freq": 0.046165,
793
+ "coding_ratio": 0.5593,
794
+ "total_activations_coding": 84448,
795
+ "total_activations_general": 64742
796
+ },
797
+ "112": {
798
+ "coding_freq": 0.103739,
799
+ "general_freq": 0.065349,
800
+ "coding_ratio": 0.6135,
801
+ "total_activations_coding": 149544,
802
+ "total_activations_general": 91647
803
+ },
804
+ "113": {
805
+ "coding_freq": 0.101153,
806
+ "general_freq": 0.036662,
807
+ "coding_ratio": 0.734,
808
+ "total_activations_coding": 145816,
809
+ "total_activations_general": 51416
810
+ },
811
+ "114": {
812
+ "coding_freq": 0.045002,
813
+ "general_freq": 0.069862,
814
+ "coding_ratio": 0.3918,
815
+ "total_activations_coding": 64872,
816
+ "total_activations_general": 97975
817
+ },
818
+ "115": {
819
+ "coding_freq": 0.064759,
820
+ "general_freq": 0.103199,
821
+ "coding_ratio": 0.3856,
822
+ "total_activations_coding": 93352,
823
+ "total_activations_general": 144728
824
+ },
825
+ "116": {
826
+ "coding_freq": 0.069143,
827
+ "general_freq": 0.085464,
828
+ "coding_ratio": 0.4472,
829
+ "total_activations_coding": 99672,
830
+ "total_activations_general": 119856
831
+ },
832
+ "117": {
833
+ "coding_freq": 0.102568,
834
+ "general_freq": 0.05739,
835
+ "coding_ratio": 0.6412,
836
+ "total_activations_coding": 147856,
837
+ "total_activations_general": 80484
838
+ },
839
+ "118": {
840
+ "coding_freq": 0.06669,
841
+ "general_freq": 0.036382,
842
+ "coding_ratio": 0.647,
843
+ "total_activations_coding": 96136,
844
+ "total_activations_general": 51022
845
+ },
846
+ "119": {
847
+ "coding_freq": 0.049386,
848
+ "general_freq": 0.028557,
849
+ "coding_ratio": 0.6336,
850
+ "total_activations_coding": 71192,
851
+ "total_activations_general": 40049
852
+ },
853
+ "120": {
854
+ "coding_freq": 0.072295,
855
+ "general_freq": 0.046735,
856
+ "coding_ratio": 0.6074,
857
+ "total_activations_coding": 104216,
858
+ "total_activations_general": 65542
859
+ },
860
+ "121": {
861
+ "coding_freq": 0.063954,
862
+ "general_freq": 0.064148,
863
+ "coding_ratio": 0.4992,
864
+ "total_activations_coding": 92192,
865
+ "total_activations_general": 89962
866
+ },
867
+ "122": {
868
+ "coding_freq": 0.038165,
869
+ "general_freq": 0.040243,
870
+ "coding_ratio": 0.4867,
871
+ "total_activations_coding": 55016,
872
+ "total_activations_general": 56438
873
+ },
874
+ "123": {
875
+ "coding_freq": 0.078788,
876
+ "general_freq": 0.056148,
877
+ "coding_ratio": 0.5839,
878
+ "total_activations_coding": 113576,
879
+ "total_activations_general": 78743
880
+ },
881
+ "124": {
882
+ "coding_freq": 0.064065,
883
+ "general_freq": 0.064196,
884
+ "coding_ratio": 0.4995,
885
+ "total_activations_coding": 92352,
886
+ "total_activations_general": 90029
887
+ },
888
+ "125": {
889
+ "coding_freq": 0.04205,
890
+ "general_freq": 0.038151,
891
+ "coding_ratio": 0.5243,
892
+ "total_activations_coding": 60616,
893
+ "total_activations_general": 53504
894
+ },
895
+ "126": {
896
+ "coding_freq": 0.087446,
897
+ "general_freq": 0.036849,
898
+ "coding_ratio": 0.7035,
899
+ "total_activations_coding": 126056,
900
+ "total_activations_general": 51678
901
+ },
902
+ "127": {
903
+ "coding_freq": 0.055557,
904
+ "general_freq": 0.106958,
905
+ "coding_ratio": 0.3419,
906
+ "total_activations_coding": 80088,
907
+ "total_activations_general": 150000
908
+ }
909
+ }
910
+ }
model-00001-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b69ac89dde37e13b9b61bbdc3e8b0863c0264feb437be2d7a3b100ee8c9dc54
3
+ size 5364035928
model-00002-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccd2b6f1e654c9c4fe53e169e9f8620772e74cd6c33fcf5afc9975919e066934
3
+ size 5275955928
model-00003-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f67e258450ee6ff4d2f56adb05a3e1183336616789ceae3c8cd92ce61e2bdc85
3
+ size 5364036184
model-00004-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c8f82b6fa13d800024b8cc12813bf39cbe25fc68f692e686dcb757bb80fcac4
3
+ size 2083428616
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
selective_quant_config.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "quantization_method": "selective_moe_roundnearest",
3
+ "architecture": "fused_3d",
4
+ "num_experts": 128,
5
+ "coding_expert_ids": [
6
+ 21,
7
+ 27,
8
+ 31,
9
+ 43,
10
+ 59,
11
+ 66,
12
+ 71,
13
+ 113,
14
+ 126
15
+ ],
16
+ "coding_bits": 8,
17
+ "other_bits": 4,
18
+ "group_size": 64,
19
+ "attention_bits": null
20
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
3
+ size 11422654
tokenizer_config.json ADDED
@@ -0,0 +1,239 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ },
181
+ "151665": {
182
+ "content": "<tool_response>",
183
+ "lstrip": false,
184
+ "normalized": false,
185
+ "rstrip": false,
186
+ "single_word": false,
187
+ "special": false
188
+ },
189
+ "151666": {
190
+ "content": "</tool_response>",
191
+ "lstrip": false,
192
+ "normalized": false,
193
+ "rstrip": false,
194
+ "single_word": false,
195
+ "special": false
196
+ },
197
+ "151667": {
198
+ "content": "<think>",
199
+ "lstrip": false,
200
+ "normalized": false,
201
+ "rstrip": false,
202
+ "single_word": false,
203
+ "special": false
204
+ },
205
+ "151668": {
206
+ "content": "</think>",
207
+ "lstrip": false,
208
+ "normalized": false,
209
+ "rstrip": false,
210
+ "single_word": false,
211
+ "special": false
212
+ }
213
+ },
214
+ "additional_special_tokens": [
215
+ "<|im_start|>",
216
+ "<|im_end|>",
217
+ "<|object_ref_start|>",
218
+ "<|object_ref_end|>",
219
+ "<|box_start|>",
220
+ "<|box_end|>",
221
+ "<|quad_start|>",
222
+ "<|quad_end|>",
223
+ "<|vision_start|>",
224
+ "<|vision_end|>",
225
+ "<|vision_pad|>",
226
+ "<|image_pad|>",
227
+ "<|video_pad|>"
228
+ ],
229
+ "bos_token": null,
230
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0].role == 'system' %}\n {{- messages[0].content + '\\n\\n' }}\n {%- endif %}\n {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0].role == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}\n{%- for message in messages[::-1] %}\n {%- set index = (messages|length - 1) - loop.index0 %}\n {%- if ns.multi_step_tool and message.role == \"user\" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}\n {%- set ns.multi_step_tool = false %}\n {%- set ns.last_query_index = index %}\n {%- endif %}\n{%- endfor %}\n{%- for message in messages %}\n {%- if message.content is string %}\n {%- set content = message.content %}\n {%- else %}\n {%- set content = '' %}\n {%- endif %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {%- set reasoning_content = '' %}\n {%- if message.reasoning_content is string %}\n {%- set reasoning_content = message.reasoning_content %}\n {%- else %}\n {%- if '</think>' in content %}\n {%- set reasoning_content = content.split('</think>')[0].rstrip('\\n').split('<think>')[-1].lstrip('\\n') %}\n {%- set content = content.split('</think>')[-1].lstrip('\\n') %}\n {%- endif %}\n {%- endif %}\n {%- if loop.index0 > ns.last_query_index %}\n {%- if loop.last or (not loop.last and reasoning_content) %}\n {{- '<|im_start|>' + message.role + '\\n<think>\\n' + reasoning_content.strip('\\n') + '\\n</think>\\n\\n' + content.lstrip('\\n') }}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- if message.tool_calls %}\n {%- for tool_call in message.tool_calls %}\n {%- if (loop.first and content) or (not loop.first) %}\n {{- '\\n' }}\n {%- endif %}\n {%- if tool_call.function %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {%- if tool_call.arguments is string %}\n {{- tool_call.arguments }}\n {%- else %}\n {{- tool_call.arguments | tojson }}\n {%- endif %}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {%- endif %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n {%- if enable_thinking is defined and enable_thinking is false %}\n {{- '<think>\\n\\n</think>\\n\\n' }}\n {%- endif %}\n{%- endif %}",
231
+ "clean_up_tokenization_spaces": false,
232
+ "eos_token": "<|im_end|>",
233
+ "errors": "replace",
234
+ "model_max_length": 131072,
235
+ "pad_token": "<|endoftext|>",
236
+ "split_special_tokens": false,
237
+ "tokenizer_class": "Qwen2Tokenizer",
238
+ "unk_token": null
239
+ }