jedisct1 commited on
Commit
3e74498
·
verified ·
1 Parent(s): d998a9d

Add files using upload-large-folder tool

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: mlx
3
+ tags:
4
+ - compression
5
+ - expert-merging
6
+ - moe
7
+ - code
8
+ - mlx
9
+ license: apache-2.0
10
+ base_model: SamsungSAILMontreal/Qwen3-Coder-Next-REAP
11
+ pipeline_tag: text-generation
12
+ ---
13
+
14
+ # jedisct1/Qwen3-Coder-Next-REAP-q5-mlx
15
+
16
+ This model [jedisct1/Qwen3-Coder-Next-REAP-q5-mlx](https://huggingface.co/jedisct1/Qwen3-Coder-Next-REAP-q5-mlx) was
17
+ converted to MLX format from [SamsungSAILMontreal/Qwen3-Coder-Next-REAP](https://huggingface.co/SamsungSAILMontreal/Qwen3-Coder-Next-REAP)
18
+ using mlx-lm version **0.30.7**.
19
+
20
+ ## Use with mlx
21
+
22
+ ```bash
23
+ pip install mlx-lm
24
+ ```
25
+
26
+ ```python
27
+ from mlx_lm import load, generate
28
+
29
+ model, tokenizer = load("jedisct1/Qwen3-Coder-Next-REAP-q5-mlx")
30
+
31
+ prompt = "hello"
32
+
33
+ if tokenizer.chat_template is not None:
34
+ messages = [{"role": "user", "content": prompt}]
35
+ prompt = tokenizer.apply_chat_template(
36
+ messages, add_generation_prompt=True, return_dict=False,
37
+ )
38
+
39
+ response = generate(model, tokenizer, prompt=prompt, verbose=True)
40
+ ```
chat_template.jinja ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {% macro render_extra_keys(json_dict, handled_keys) %}
2
+ {%- if json_dict is mapping %}
3
+ {%- for json_key in json_dict if json_key not in handled_keys %}
4
+ {%- if json_dict[json_key] is string %}
5
+ {{-'\n<' ~ json_key ~ '>' ~ (json_dict[json_key] | string) ~ '</' ~ json_key ~ '>' }}
6
+ {%- else %}
7
+ {{- '\n<' ~ json_key ~ '>' ~ (json_dict[json_key] | tojson | safe) ~ '</' ~ json_key ~ '>' }}
8
+ {%- endif %}
9
+ {%- endfor %}
10
+ {%- endif %}
11
+ {%- endmacro %}
12
+
13
+ {%- if messages[0]["role"] == "system" %}
14
+ {%- set system_message = messages[0]["content"] %}
15
+ {%- set loop_messages = messages[1:] %}
16
+ {%- else %}
17
+ {%- set loop_messages = messages %}
18
+ {%- endif %}
19
+
20
+ {%- if not tools is defined %}
21
+ {%- set tools = [] %}
22
+ {%- endif %}
23
+
24
+ {%- if system_message is defined %}
25
+ {{- "<|im_start|>system\n" + system_message }}
26
+ {%- else %}
27
+ {%- if tools is iterable and tools | length > 0 %}
28
+ {{- "<|im_start|>system\nYou are Qwen, a helpful AI assistant that can interact with a computer to solve tasks." }}
29
+ {%- endif %}
30
+ {%- endif %}
31
+ {%- if tools is iterable and tools | length > 0 %}
32
+ {{- "\n\n# Tools\n\nYou have access to the following functions:\n\n" }}
33
+ {{- "<tools>" }}
34
+ {%- for tool in tools %}
35
+ {%- if tool.function is defined %}
36
+ {%- set tool = tool.function %}
37
+ {%- endif %}
38
+ {{- "\n<function>\n<name>" ~ tool.name ~ "</name>" }}
39
+ {%- if tool.description is defined %}
40
+ {{- '\n<description>' ~ (tool.description | trim) ~ '</description>' }}
41
+ {%- endif %}
42
+ {{- '\n<parameters>' }}
43
+ {%- if tool.parameters is defined and tool.parameters is mapping and tool.parameters.properties is defined and tool.parameters.properties is mapping %}
44
+ {%- for param_name, param_fields in tool.parameters.properties|items %}
45
+ {{- '\n<parameter>' }}
46
+ {{- '\n<name>' ~ param_name ~ '</name>' }}
47
+ {%- if param_fields.type is defined %}
48
+ {{- '\n<type>' ~ (param_fields.type | string) ~ '</type>' }}
49
+ {%- endif %}
50
+ {%- if param_fields.description is defined %}
51
+ {{- '\n<description>' ~ (param_fields.description | trim) ~ '</description>' }}
52
+ {%- endif %}
53
+ {%- set handled_keys = ['name', 'type', 'description'] %}
54
+ {{- render_extra_keys(param_fields, handled_keys) }}
55
+ {{- '\n</parameter>' }}
56
+ {%- endfor %}
57
+ {%- endif %}
58
+ {%- set handled_keys = ['type', 'properties'] %}
59
+ {{- render_extra_keys(tool.parameters, handled_keys) }}
60
+ {{- '\n</parameters>' }}
61
+ {%- set handled_keys = ['type', 'name', 'description', 'parameters'] %}
62
+ {{- render_extra_keys(tool, handled_keys) }}
63
+ {{- '\n</function>' }}
64
+ {%- endfor %}
65
+ {{- "\n</tools>" }}
66
+ {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n<tool_call>\n<function=example_function_name>\n<parameter=example_parameter_1>\nvalue_1\n</parameter>\n<parameter=example_parameter_2>\nThis is the value for the second parameter\nthat can span\nmultiple lines\n</parameter>\n</function>\n</tool_call>\n\n<IMPORTANT>\nReminder:\n- Function calls MUST follow the specified format: an inner <function=...></function> block must be nested within <tool_call></tool_call> XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n</IMPORTANT>' }}
67
+ {%- endif %}
68
+ {%- if system_message is defined %}
69
+ {{- '<|im_end|>\n' }}
70
+ {%- else %}
71
+ {%- if tools is iterable and tools | length > 0 %}
72
+ {{- '<|im_end|>\n' }}
73
+ {%- endif %}
74
+ {%- endif %}
75
+ {%- for message in loop_messages %}
76
+ {%- if message.role == "assistant" and message.tool_calls is defined and message.tool_calls is iterable and message.tool_calls | length > 0 %}
77
+ {{- '<|im_start|>' + message.role }}
78
+ {%- if message.content is defined and message.content is string and message.content | trim | length > 0 %}
79
+ {{- '\n' + message.content | trim + '\n' }}
80
+ {%- endif %}
81
+ {%- for tool_call in message.tool_calls %}
82
+ {%- if tool_call.function is defined %}
83
+ {%- set tool_call = tool_call.function %}
84
+ {%- endif %}
85
+ {{- '\n<tool_call>\n<function=' + tool_call.name + '>\n' }}
86
+ {%- if tool_call.arguments is defined %}
87
+ {%- for args_name, args_value in tool_call.arguments|items %}
88
+ {{- '<parameter=' + args_name + '>\n' }}
89
+ {%- set args_value = args_value if args_value is string else args_value | tojson | safe %}
90
+ {{- args_value }}
91
+ {{- '\n</parameter>\n' }}
92
+ {%- endfor %}
93
+ {%- endif %}
94
+ {{- '</function>\n</tool_call>' }}
95
+ {%- endfor %}
96
+ {{- '<|im_end|>\n' }}
97
+ {%- elif message.role == "user" or message.role == "system" or message.role == "assistant" %}
98
+ {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
99
+ {%- elif message.role == "tool" %}
100
+ {%- if loop.previtem and loop.previtem.role != "tool" %}
101
+ {{- '<|im_start|>user' }}
102
+ {%- endif %}
103
+ {{- '\n<tool_response>\n' }}
104
+ {{- message.content }}
105
+ {{- '\n</tool_response>' }}
106
+ {%- if not loop.last and loop.nextitem.role != "tool" %}
107
+ {{- '<|im_end|>\n' }}
108
+ {%- elif loop.last %}
109
+ {{- '<|im_end|>\n' }}
110
+ {%- endif %}
111
+ {%- else %}
112
+ {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>\n' }}
113
+ {%- endif %}
114
+ {%- endfor %}
115
+ {%- if add_generation_prompt %}
116
+ {{- '<|im_start|>assistant\n' }}
117
+ {%- endif %}
config.json ADDED
@@ -0,0 +1,889 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3NextForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0,
7
+ "bos_token_id": 151643,
8
+ "decoder_sparse_step": 1,
9
+ "dtype": "bfloat16",
10
+ "eos_token_id": [
11
+ 151645,
12
+ 151643
13
+ ],
14
+ "full_attention_interval": 4,
15
+ "head_dim": 256,
16
+ "hidden_act": "silu",
17
+ "hidden_size": 2048,
18
+ "initializer_range": 0.02,
19
+ "intermediate_size": 5120,
20
+ "layer_types": [
21
+ "linear_attention",
22
+ "linear_attention",
23
+ "linear_attention",
24
+ "full_attention",
25
+ "linear_attention",
26
+ "linear_attention",
27
+ "linear_attention",
28
+ "full_attention",
29
+ "linear_attention",
30
+ "linear_attention",
31
+ "linear_attention",
32
+ "full_attention",
33
+ "linear_attention",
34
+ "linear_attention",
35
+ "linear_attention",
36
+ "full_attention",
37
+ "linear_attention",
38
+ "linear_attention",
39
+ "linear_attention",
40
+ "full_attention",
41
+ "linear_attention",
42
+ "linear_attention",
43
+ "linear_attention",
44
+ "full_attention",
45
+ "linear_attention",
46
+ "linear_attention",
47
+ "linear_attention",
48
+ "full_attention",
49
+ "linear_attention",
50
+ "linear_attention",
51
+ "linear_attention",
52
+ "full_attention",
53
+ "linear_attention",
54
+ "linear_attention",
55
+ "linear_attention",
56
+ "full_attention",
57
+ "linear_attention",
58
+ "linear_attention",
59
+ "linear_attention",
60
+ "full_attention",
61
+ "linear_attention",
62
+ "linear_attention",
63
+ "linear_attention",
64
+ "full_attention",
65
+ "linear_attention",
66
+ "linear_attention",
67
+ "linear_attention",
68
+ "full_attention"
69
+ ],
70
+ "linear_conv_kernel_dim": 4,
71
+ "linear_key_head_dim": 128,
72
+ "linear_num_key_heads": 16,
73
+ "linear_num_value_heads": 32,
74
+ "linear_value_head_dim": 128,
75
+ "max_position_embeddings": 262144,
76
+ "merge_args": {
77
+ "balance_group_size": 0,
78
+ "dataset": "c4+math+the-stack-smol",
79
+ "expert_saliency": "reap",
80
+ "gate_softmax": false,
81
+ "group": "freq_logits",
82
+ "merge": "none",
83
+ "merge_size": 384,
84
+ "merger_bs": 3072,
85
+ "merger_seq_len": 512,
86
+ "pca_dim": 64,
87
+ "precompute_input": true,
88
+ "use_gate_output": false
89
+ },
90
+ "mlp_only_layers": [],
91
+ "model_type": "qwen3_next",
92
+ "moe_intermediate_size": 512,
93
+ "norm_topk_prob": true,
94
+ "num_attention_heads": 16,
95
+ "num_experts": 384,
96
+ "num_experts_per_tok": 10,
97
+ "num_hidden_layers": 48,
98
+ "num_key_value_heads": 2,
99
+ "output_router_logits": false,
100
+ "partial_rotary_factor": 0.25,
101
+ "quantization": {
102
+ "group_size": 32,
103
+ "bits": 5,
104
+ "mode": "affine",
105
+ "model.layers.0.mlp.gate": {
106
+ "group_size": 64,
107
+ "bits": 8
108
+ },
109
+ "model.layers.0.mlp.shared_expert_gate": {
110
+ "group_size": 64,
111
+ "bits": 8
112
+ },
113
+ "model.layers.1.mlp.gate": {
114
+ "group_size": 64,
115
+ "bits": 8
116
+ },
117
+ "model.layers.1.mlp.shared_expert_gate": {
118
+ "group_size": 64,
119
+ "bits": 8
120
+ },
121
+ "model.layers.2.mlp.gate": {
122
+ "group_size": 64,
123
+ "bits": 8
124
+ },
125
+ "model.layers.2.mlp.shared_expert_gate": {
126
+ "group_size": 64,
127
+ "bits": 8
128
+ },
129
+ "model.layers.3.mlp.gate": {
130
+ "group_size": 64,
131
+ "bits": 8
132
+ },
133
+ "model.layers.3.mlp.shared_expert_gate": {
134
+ "group_size": 64,
135
+ "bits": 8
136
+ },
137
+ "model.layers.4.mlp.gate": {
138
+ "group_size": 64,
139
+ "bits": 8
140
+ },
141
+ "model.layers.4.mlp.shared_expert_gate": {
142
+ "group_size": 64,
143
+ "bits": 8
144
+ },
145
+ "model.layers.5.mlp.gate": {
146
+ "group_size": 64,
147
+ "bits": 8
148
+ },
149
+ "model.layers.5.mlp.shared_expert_gate": {
150
+ "group_size": 64,
151
+ "bits": 8
152
+ },
153
+ "model.layers.6.mlp.gate": {
154
+ "group_size": 64,
155
+ "bits": 8
156
+ },
157
+ "model.layers.6.mlp.shared_expert_gate": {
158
+ "group_size": 64,
159
+ "bits": 8
160
+ },
161
+ "model.layers.7.mlp.gate": {
162
+ "group_size": 64,
163
+ "bits": 8
164
+ },
165
+ "model.layers.7.mlp.shared_expert_gate": {
166
+ "group_size": 64,
167
+ "bits": 8
168
+ },
169
+ "model.layers.8.mlp.gate": {
170
+ "group_size": 64,
171
+ "bits": 8
172
+ },
173
+ "model.layers.8.mlp.shared_expert_gate": {
174
+ "group_size": 64,
175
+ "bits": 8
176
+ },
177
+ "model.layers.9.mlp.gate": {
178
+ "group_size": 64,
179
+ "bits": 8
180
+ },
181
+ "model.layers.9.mlp.shared_expert_gate": {
182
+ "group_size": 64,
183
+ "bits": 8
184
+ },
185
+ "model.layers.10.mlp.gate": {
186
+ "group_size": 64,
187
+ "bits": 8
188
+ },
189
+ "model.layers.10.mlp.shared_expert_gate": {
190
+ "group_size": 64,
191
+ "bits": 8
192
+ },
193
+ "model.layers.11.mlp.gate": {
194
+ "group_size": 64,
195
+ "bits": 8
196
+ },
197
+ "model.layers.11.mlp.shared_expert_gate": {
198
+ "group_size": 64,
199
+ "bits": 8
200
+ },
201
+ "model.layers.12.mlp.gate": {
202
+ "group_size": 64,
203
+ "bits": 8
204
+ },
205
+ "model.layers.12.mlp.shared_expert_gate": {
206
+ "group_size": 64,
207
+ "bits": 8
208
+ },
209
+ "model.layers.13.mlp.gate": {
210
+ "group_size": 64,
211
+ "bits": 8
212
+ },
213
+ "model.layers.13.mlp.shared_expert_gate": {
214
+ "group_size": 64,
215
+ "bits": 8
216
+ },
217
+ "model.layers.14.mlp.gate": {
218
+ "group_size": 64,
219
+ "bits": 8
220
+ },
221
+ "model.layers.14.mlp.shared_expert_gate": {
222
+ "group_size": 64,
223
+ "bits": 8
224
+ },
225
+ "model.layers.15.mlp.gate": {
226
+ "group_size": 64,
227
+ "bits": 8
228
+ },
229
+ "model.layers.15.mlp.shared_expert_gate": {
230
+ "group_size": 64,
231
+ "bits": 8
232
+ },
233
+ "model.layers.16.mlp.gate": {
234
+ "group_size": 64,
235
+ "bits": 8
236
+ },
237
+ "model.layers.16.mlp.shared_expert_gate": {
238
+ "group_size": 64,
239
+ "bits": 8
240
+ },
241
+ "model.layers.17.mlp.gate": {
242
+ "group_size": 64,
243
+ "bits": 8
244
+ },
245
+ "model.layers.17.mlp.shared_expert_gate": {
246
+ "group_size": 64,
247
+ "bits": 8
248
+ },
249
+ "model.layers.18.mlp.gate": {
250
+ "group_size": 64,
251
+ "bits": 8
252
+ },
253
+ "model.layers.18.mlp.shared_expert_gate": {
254
+ "group_size": 64,
255
+ "bits": 8
256
+ },
257
+ "model.layers.19.mlp.gate": {
258
+ "group_size": 64,
259
+ "bits": 8
260
+ },
261
+ "model.layers.19.mlp.shared_expert_gate": {
262
+ "group_size": 64,
263
+ "bits": 8
264
+ },
265
+ "model.layers.20.mlp.gate": {
266
+ "group_size": 64,
267
+ "bits": 8
268
+ },
269
+ "model.layers.20.mlp.shared_expert_gate": {
270
+ "group_size": 64,
271
+ "bits": 8
272
+ },
273
+ "model.layers.21.mlp.gate": {
274
+ "group_size": 64,
275
+ "bits": 8
276
+ },
277
+ "model.layers.21.mlp.shared_expert_gate": {
278
+ "group_size": 64,
279
+ "bits": 8
280
+ },
281
+ "model.layers.22.mlp.gate": {
282
+ "group_size": 64,
283
+ "bits": 8
284
+ },
285
+ "model.layers.22.mlp.shared_expert_gate": {
286
+ "group_size": 64,
287
+ "bits": 8
288
+ },
289
+ "model.layers.23.mlp.gate": {
290
+ "group_size": 64,
291
+ "bits": 8
292
+ },
293
+ "model.layers.23.mlp.shared_expert_gate": {
294
+ "group_size": 64,
295
+ "bits": 8
296
+ },
297
+ "model.layers.24.mlp.gate": {
298
+ "group_size": 64,
299
+ "bits": 8
300
+ },
301
+ "model.layers.24.mlp.shared_expert_gate": {
302
+ "group_size": 64,
303
+ "bits": 8
304
+ },
305
+ "model.layers.25.mlp.gate": {
306
+ "group_size": 64,
307
+ "bits": 8
308
+ },
309
+ "model.layers.25.mlp.shared_expert_gate": {
310
+ "group_size": 64,
311
+ "bits": 8
312
+ },
313
+ "model.layers.26.mlp.gate": {
314
+ "group_size": 64,
315
+ "bits": 8
316
+ },
317
+ "model.layers.26.mlp.shared_expert_gate": {
318
+ "group_size": 64,
319
+ "bits": 8
320
+ },
321
+ "model.layers.27.mlp.gate": {
322
+ "group_size": 64,
323
+ "bits": 8
324
+ },
325
+ "model.layers.27.mlp.shared_expert_gate": {
326
+ "group_size": 64,
327
+ "bits": 8
328
+ },
329
+ "model.layers.28.mlp.gate": {
330
+ "group_size": 64,
331
+ "bits": 8
332
+ },
333
+ "model.layers.28.mlp.shared_expert_gate": {
334
+ "group_size": 64,
335
+ "bits": 8
336
+ },
337
+ "model.layers.29.mlp.gate": {
338
+ "group_size": 64,
339
+ "bits": 8
340
+ },
341
+ "model.layers.29.mlp.shared_expert_gate": {
342
+ "group_size": 64,
343
+ "bits": 8
344
+ },
345
+ "model.layers.30.mlp.gate": {
346
+ "group_size": 64,
347
+ "bits": 8
348
+ },
349
+ "model.layers.30.mlp.shared_expert_gate": {
350
+ "group_size": 64,
351
+ "bits": 8
352
+ },
353
+ "model.layers.31.mlp.gate": {
354
+ "group_size": 64,
355
+ "bits": 8
356
+ },
357
+ "model.layers.31.mlp.shared_expert_gate": {
358
+ "group_size": 64,
359
+ "bits": 8
360
+ },
361
+ "model.layers.32.mlp.gate": {
362
+ "group_size": 64,
363
+ "bits": 8
364
+ },
365
+ "model.layers.32.mlp.shared_expert_gate": {
366
+ "group_size": 64,
367
+ "bits": 8
368
+ },
369
+ "model.layers.33.mlp.gate": {
370
+ "group_size": 64,
371
+ "bits": 8
372
+ },
373
+ "model.layers.33.mlp.shared_expert_gate": {
374
+ "group_size": 64,
375
+ "bits": 8
376
+ },
377
+ "model.layers.34.mlp.gate": {
378
+ "group_size": 64,
379
+ "bits": 8
380
+ },
381
+ "model.layers.34.mlp.shared_expert_gate": {
382
+ "group_size": 64,
383
+ "bits": 8
384
+ },
385
+ "model.layers.35.mlp.gate": {
386
+ "group_size": 64,
387
+ "bits": 8
388
+ },
389
+ "model.layers.35.mlp.shared_expert_gate": {
390
+ "group_size": 64,
391
+ "bits": 8
392
+ },
393
+ "model.layers.36.mlp.gate": {
394
+ "group_size": 64,
395
+ "bits": 8
396
+ },
397
+ "model.layers.36.mlp.shared_expert_gate": {
398
+ "group_size": 64,
399
+ "bits": 8
400
+ },
401
+ "model.layers.37.mlp.gate": {
402
+ "group_size": 64,
403
+ "bits": 8
404
+ },
405
+ "model.layers.37.mlp.shared_expert_gate": {
406
+ "group_size": 64,
407
+ "bits": 8
408
+ },
409
+ "model.layers.38.mlp.gate": {
410
+ "group_size": 64,
411
+ "bits": 8
412
+ },
413
+ "model.layers.38.mlp.shared_expert_gate": {
414
+ "group_size": 64,
415
+ "bits": 8
416
+ },
417
+ "model.layers.39.mlp.gate": {
418
+ "group_size": 64,
419
+ "bits": 8
420
+ },
421
+ "model.layers.39.mlp.shared_expert_gate": {
422
+ "group_size": 64,
423
+ "bits": 8
424
+ },
425
+ "model.layers.40.mlp.gate": {
426
+ "group_size": 64,
427
+ "bits": 8
428
+ },
429
+ "model.layers.40.mlp.shared_expert_gate": {
430
+ "group_size": 64,
431
+ "bits": 8
432
+ },
433
+ "model.layers.41.mlp.gate": {
434
+ "group_size": 64,
435
+ "bits": 8
436
+ },
437
+ "model.layers.41.mlp.shared_expert_gate": {
438
+ "group_size": 64,
439
+ "bits": 8
440
+ },
441
+ "model.layers.42.mlp.gate": {
442
+ "group_size": 64,
443
+ "bits": 8
444
+ },
445
+ "model.layers.42.mlp.shared_expert_gate": {
446
+ "group_size": 64,
447
+ "bits": 8
448
+ },
449
+ "model.layers.43.mlp.gate": {
450
+ "group_size": 64,
451
+ "bits": 8
452
+ },
453
+ "model.layers.43.mlp.shared_expert_gate": {
454
+ "group_size": 64,
455
+ "bits": 8
456
+ },
457
+ "model.layers.44.mlp.gate": {
458
+ "group_size": 64,
459
+ "bits": 8
460
+ },
461
+ "model.layers.44.mlp.shared_expert_gate": {
462
+ "group_size": 64,
463
+ "bits": 8
464
+ },
465
+ "model.layers.45.mlp.gate": {
466
+ "group_size": 64,
467
+ "bits": 8
468
+ },
469
+ "model.layers.45.mlp.shared_expert_gate": {
470
+ "group_size": 64,
471
+ "bits": 8
472
+ },
473
+ "model.layers.46.mlp.gate": {
474
+ "group_size": 64,
475
+ "bits": 8
476
+ },
477
+ "model.layers.46.mlp.shared_expert_gate": {
478
+ "group_size": 64,
479
+ "bits": 8
480
+ },
481
+ "model.layers.47.mlp.gate": {
482
+ "group_size": 64,
483
+ "bits": 8
484
+ },
485
+ "model.layers.47.mlp.shared_expert_gate": {
486
+ "group_size": 64,
487
+ "bits": 8
488
+ }
489
+ },
490
+ "quantization_config": {
491
+ "group_size": 32,
492
+ "bits": 5,
493
+ "mode": "affine",
494
+ "model.layers.0.mlp.gate": {
495
+ "group_size": 64,
496
+ "bits": 8
497
+ },
498
+ "model.layers.0.mlp.shared_expert_gate": {
499
+ "group_size": 64,
500
+ "bits": 8
501
+ },
502
+ "model.layers.1.mlp.gate": {
503
+ "group_size": 64,
504
+ "bits": 8
505
+ },
506
+ "model.layers.1.mlp.shared_expert_gate": {
507
+ "group_size": 64,
508
+ "bits": 8
509
+ },
510
+ "model.layers.2.mlp.gate": {
511
+ "group_size": 64,
512
+ "bits": 8
513
+ },
514
+ "model.layers.2.mlp.shared_expert_gate": {
515
+ "group_size": 64,
516
+ "bits": 8
517
+ },
518
+ "model.layers.3.mlp.gate": {
519
+ "group_size": 64,
520
+ "bits": 8
521
+ },
522
+ "model.layers.3.mlp.shared_expert_gate": {
523
+ "group_size": 64,
524
+ "bits": 8
525
+ },
526
+ "model.layers.4.mlp.gate": {
527
+ "group_size": 64,
528
+ "bits": 8
529
+ },
530
+ "model.layers.4.mlp.shared_expert_gate": {
531
+ "group_size": 64,
532
+ "bits": 8
533
+ },
534
+ "model.layers.5.mlp.gate": {
535
+ "group_size": 64,
536
+ "bits": 8
537
+ },
538
+ "model.layers.5.mlp.shared_expert_gate": {
539
+ "group_size": 64,
540
+ "bits": 8
541
+ },
542
+ "model.layers.6.mlp.gate": {
543
+ "group_size": 64,
544
+ "bits": 8
545
+ },
546
+ "model.layers.6.mlp.shared_expert_gate": {
547
+ "group_size": 64,
548
+ "bits": 8
549
+ },
550
+ "model.layers.7.mlp.gate": {
551
+ "group_size": 64,
552
+ "bits": 8
553
+ },
554
+ "model.layers.7.mlp.shared_expert_gate": {
555
+ "group_size": 64,
556
+ "bits": 8
557
+ },
558
+ "model.layers.8.mlp.gate": {
559
+ "group_size": 64,
560
+ "bits": 8
561
+ },
562
+ "model.layers.8.mlp.shared_expert_gate": {
563
+ "group_size": 64,
564
+ "bits": 8
565
+ },
566
+ "model.layers.9.mlp.gate": {
567
+ "group_size": 64,
568
+ "bits": 8
569
+ },
570
+ "model.layers.9.mlp.shared_expert_gate": {
571
+ "group_size": 64,
572
+ "bits": 8
573
+ },
574
+ "model.layers.10.mlp.gate": {
575
+ "group_size": 64,
576
+ "bits": 8
577
+ },
578
+ "model.layers.10.mlp.shared_expert_gate": {
579
+ "group_size": 64,
580
+ "bits": 8
581
+ },
582
+ "model.layers.11.mlp.gate": {
583
+ "group_size": 64,
584
+ "bits": 8
585
+ },
586
+ "model.layers.11.mlp.shared_expert_gate": {
587
+ "group_size": 64,
588
+ "bits": 8
589
+ },
590
+ "model.layers.12.mlp.gate": {
591
+ "group_size": 64,
592
+ "bits": 8
593
+ },
594
+ "model.layers.12.mlp.shared_expert_gate": {
595
+ "group_size": 64,
596
+ "bits": 8
597
+ },
598
+ "model.layers.13.mlp.gate": {
599
+ "group_size": 64,
600
+ "bits": 8
601
+ },
602
+ "model.layers.13.mlp.shared_expert_gate": {
603
+ "group_size": 64,
604
+ "bits": 8
605
+ },
606
+ "model.layers.14.mlp.gate": {
607
+ "group_size": 64,
608
+ "bits": 8
609
+ },
610
+ "model.layers.14.mlp.shared_expert_gate": {
611
+ "group_size": 64,
612
+ "bits": 8
613
+ },
614
+ "model.layers.15.mlp.gate": {
615
+ "group_size": 64,
616
+ "bits": 8
617
+ },
618
+ "model.layers.15.mlp.shared_expert_gate": {
619
+ "group_size": 64,
620
+ "bits": 8
621
+ },
622
+ "model.layers.16.mlp.gate": {
623
+ "group_size": 64,
624
+ "bits": 8
625
+ },
626
+ "model.layers.16.mlp.shared_expert_gate": {
627
+ "group_size": 64,
628
+ "bits": 8
629
+ },
630
+ "model.layers.17.mlp.gate": {
631
+ "group_size": 64,
632
+ "bits": 8
633
+ },
634
+ "model.layers.17.mlp.shared_expert_gate": {
635
+ "group_size": 64,
636
+ "bits": 8
637
+ },
638
+ "model.layers.18.mlp.gate": {
639
+ "group_size": 64,
640
+ "bits": 8
641
+ },
642
+ "model.layers.18.mlp.shared_expert_gate": {
643
+ "group_size": 64,
644
+ "bits": 8
645
+ },
646
+ "model.layers.19.mlp.gate": {
647
+ "group_size": 64,
648
+ "bits": 8
649
+ },
650
+ "model.layers.19.mlp.shared_expert_gate": {
651
+ "group_size": 64,
652
+ "bits": 8
653
+ },
654
+ "model.layers.20.mlp.gate": {
655
+ "group_size": 64,
656
+ "bits": 8
657
+ },
658
+ "model.layers.20.mlp.shared_expert_gate": {
659
+ "group_size": 64,
660
+ "bits": 8
661
+ },
662
+ "model.layers.21.mlp.gate": {
663
+ "group_size": 64,
664
+ "bits": 8
665
+ },
666
+ "model.layers.21.mlp.shared_expert_gate": {
667
+ "group_size": 64,
668
+ "bits": 8
669
+ },
670
+ "model.layers.22.mlp.gate": {
671
+ "group_size": 64,
672
+ "bits": 8
673
+ },
674
+ "model.layers.22.mlp.shared_expert_gate": {
675
+ "group_size": 64,
676
+ "bits": 8
677
+ },
678
+ "model.layers.23.mlp.gate": {
679
+ "group_size": 64,
680
+ "bits": 8
681
+ },
682
+ "model.layers.23.mlp.shared_expert_gate": {
683
+ "group_size": 64,
684
+ "bits": 8
685
+ },
686
+ "model.layers.24.mlp.gate": {
687
+ "group_size": 64,
688
+ "bits": 8
689
+ },
690
+ "model.layers.24.mlp.shared_expert_gate": {
691
+ "group_size": 64,
692
+ "bits": 8
693
+ },
694
+ "model.layers.25.mlp.gate": {
695
+ "group_size": 64,
696
+ "bits": 8
697
+ },
698
+ "model.layers.25.mlp.shared_expert_gate": {
699
+ "group_size": 64,
700
+ "bits": 8
701
+ },
702
+ "model.layers.26.mlp.gate": {
703
+ "group_size": 64,
704
+ "bits": 8
705
+ },
706
+ "model.layers.26.mlp.shared_expert_gate": {
707
+ "group_size": 64,
708
+ "bits": 8
709
+ },
710
+ "model.layers.27.mlp.gate": {
711
+ "group_size": 64,
712
+ "bits": 8
713
+ },
714
+ "model.layers.27.mlp.shared_expert_gate": {
715
+ "group_size": 64,
716
+ "bits": 8
717
+ },
718
+ "model.layers.28.mlp.gate": {
719
+ "group_size": 64,
720
+ "bits": 8
721
+ },
722
+ "model.layers.28.mlp.shared_expert_gate": {
723
+ "group_size": 64,
724
+ "bits": 8
725
+ },
726
+ "model.layers.29.mlp.gate": {
727
+ "group_size": 64,
728
+ "bits": 8
729
+ },
730
+ "model.layers.29.mlp.shared_expert_gate": {
731
+ "group_size": 64,
732
+ "bits": 8
733
+ },
734
+ "model.layers.30.mlp.gate": {
735
+ "group_size": 64,
736
+ "bits": 8
737
+ },
738
+ "model.layers.30.mlp.shared_expert_gate": {
739
+ "group_size": 64,
740
+ "bits": 8
741
+ },
742
+ "model.layers.31.mlp.gate": {
743
+ "group_size": 64,
744
+ "bits": 8
745
+ },
746
+ "model.layers.31.mlp.shared_expert_gate": {
747
+ "group_size": 64,
748
+ "bits": 8
749
+ },
750
+ "model.layers.32.mlp.gate": {
751
+ "group_size": 64,
752
+ "bits": 8
753
+ },
754
+ "model.layers.32.mlp.shared_expert_gate": {
755
+ "group_size": 64,
756
+ "bits": 8
757
+ },
758
+ "model.layers.33.mlp.gate": {
759
+ "group_size": 64,
760
+ "bits": 8
761
+ },
762
+ "model.layers.33.mlp.shared_expert_gate": {
763
+ "group_size": 64,
764
+ "bits": 8
765
+ },
766
+ "model.layers.34.mlp.gate": {
767
+ "group_size": 64,
768
+ "bits": 8
769
+ },
770
+ "model.layers.34.mlp.shared_expert_gate": {
771
+ "group_size": 64,
772
+ "bits": 8
773
+ },
774
+ "model.layers.35.mlp.gate": {
775
+ "group_size": 64,
776
+ "bits": 8
777
+ },
778
+ "model.layers.35.mlp.shared_expert_gate": {
779
+ "group_size": 64,
780
+ "bits": 8
781
+ },
782
+ "model.layers.36.mlp.gate": {
783
+ "group_size": 64,
784
+ "bits": 8
785
+ },
786
+ "model.layers.36.mlp.shared_expert_gate": {
787
+ "group_size": 64,
788
+ "bits": 8
789
+ },
790
+ "model.layers.37.mlp.gate": {
791
+ "group_size": 64,
792
+ "bits": 8
793
+ },
794
+ "model.layers.37.mlp.shared_expert_gate": {
795
+ "group_size": 64,
796
+ "bits": 8
797
+ },
798
+ "model.layers.38.mlp.gate": {
799
+ "group_size": 64,
800
+ "bits": 8
801
+ },
802
+ "model.layers.38.mlp.shared_expert_gate": {
803
+ "group_size": 64,
804
+ "bits": 8
805
+ },
806
+ "model.layers.39.mlp.gate": {
807
+ "group_size": 64,
808
+ "bits": 8
809
+ },
810
+ "model.layers.39.mlp.shared_expert_gate": {
811
+ "group_size": 64,
812
+ "bits": 8
813
+ },
814
+ "model.layers.40.mlp.gate": {
815
+ "group_size": 64,
816
+ "bits": 8
817
+ },
818
+ "model.layers.40.mlp.shared_expert_gate": {
819
+ "group_size": 64,
820
+ "bits": 8
821
+ },
822
+ "model.layers.41.mlp.gate": {
823
+ "group_size": 64,
824
+ "bits": 8
825
+ },
826
+ "model.layers.41.mlp.shared_expert_gate": {
827
+ "group_size": 64,
828
+ "bits": 8
829
+ },
830
+ "model.layers.42.mlp.gate": {
831
+ "group_size": 64,
832
+ "bits": 8
833
+ },
834
+ "model.layers.42.mlp.shared_expert_gate": {
835
+ "group_size": 64,
836
+ "bits": 8
837
+ },
838
+ "model.layers.43.mlp.gate": {
839
+ "group_size": 64,
840
+ "bits": 8
841
+ },
842
+ "model.layers.43.mlp.shared_expert_gate": {
843
+ "group_size": 64,
844
+ "bits": 8
845
+ },
846
+ "model.layers.44.mlp.gate": {
847
+ "group_size": 64,
848
+ "bits": 8
849
+ },
850
+ "model.layers.44.mlp.shared_expert_gate": {
851
+ "group_size": 64,
852
+ "bits": 8
853
+ },
854
+ "model.layers.45.mlp.gate": {
855
+ "group_size": 64,
856
+ "bits": 8
857
+ },
858
+ "model.layers.45.mlp.shared_expert_gate": {
859
+ "group_size": 64,
860
+ "bits": 8
861
+ },
862
+ "model.layers.46.mlp.gate": {
863
+ "group_size": 64,
864
+ "bits": 8
865
+ },
866
+ "model.layers.46.mlp.shared_expert_gate": {
867
+ "group_size": 64,
868
+ "bits": 8
869
+ },
870
+ "model.layers.47.mlp.gate": {
871
+ "group_size": 64,
872
+ "bits": 8
873
+ },
874
+ "model.layers.47.mlp.shared_expert_gate": {
875
+ "group_size": 64,
876
+ "bits": 8
877
+ }
878
+ },
879
+ "rms_norm_eps": 1e-06,
880
+ "rope_scaling": null,
881
+ "rope_theta": 5000000,
882
+ "router_aux_loss_coef": 0.001,
883
+ "shared_expert_intermediate_size": 512,
884
+ "tie_word_embeddings": false,
885
+ "transformers_version": "4.57.6",
886
+ "use_cache": true,
887
+ "use_sliding_window": false,
888
+ "vocab_size": 151936
889
+ }
generation_config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 151645,
6
+ 151643
7
+ ],
8
+ "pad_token_id": 151643,
9
+ "top_k": 40,
10
+ "top_p": 0.95,
11
+ "transformers_version": "4.57.6"
12
+ }
model-00001-of-00009.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f39acf26179d041ae0f30e42398018254a19746b34573591a20b946153174be
3
+ size 5229206566
model-00002-of-00009.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd19c58331acbe4bb4a494162b7f808b60263ed781deb4a5f2c66564f11db757
3
+ size 5295304062
model-00003-of-00009.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01fba25db76dd6c3923fee3fc76f68270bfbe00b45333099b51a4b0786f81002
3
+ size 5271647162
model-00004-of-00009.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebf0e850443e4a5b3ebbc3a43155cb074abb0acbda0f73855503cb517830b2bf
3
+ size 5300185961
model-00005-of-00009.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0637b952d3fb784a39f4c1c54d3eb1d0ce89e14413d033c908729b03511df66a
3
+ size 5295304203
model-00006-of-00009.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7273b5d77e0dddd20d8420f2bb1603cee5e0ea7c67a5cfdedf19f3f44426d24a
3
+ size 5271647130
model-00007-of-00009.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cb888eb5a812e59a76d1fe062a82c10d0997521941bbebdb7cc92ded30a710c
3
+ size 5295304295
model-00008-of-00009.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71a9c08ed9b8f66ef157501e3c29debdc25a6a2965ebf7053350bd873ae2e878
3
+ size 5300185937
model-00009-of-00009.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fec66f25bd23c1c9e65339e50c98f9664a38da695fc46c2db7f29744524558b7
3
+ size 3005849432
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506
3
+ size 11422650
tokenizer_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "backend": "tokenizers",
4
+ "bos_token": null,
5
+ "clean_up_tokenization_spaces": false,
6
+ "eos_token": "<|im_end|>",
7
+ "errors": "replace",
8
+ "is_local": true,
9
+ "model_max_length": 1048576,
10
+ "pad_token": "<|endoftext|>",
11
+ "split_special_tokens": false,
12
+ "tokenizer_class": "Qwen2Tokenizer",
13
+ "tool_parser_type": "qwen3_coder",
14
+ "unk_token": null
15
+ }