KrisSimon commited on
Commit
a094767
·
verified ·
1 Parent(s): 012bd30

Upload ARO Coder 4-bit (distill_student)

Browse files
.source_model CHANGED
@@ -1 +1 @@
1
- /Users/kris/Projects/ARO/ARO-Train/Train/models/dpo/fused
 
1
+ /Users/kris/Projects/ARO/ARO-Train/Train/models/distill/student/fused
README.md CHANGED
@@ -26,9 +26,9 @@ ARO is a domain-specific language where every statement follows the pattern:
26
  | **Base model** | [mlx-community/Qwen3-Coder-30B-A3B-Instruct-4bit](https://huggingface.co/mlx-community/Qwen3-Coder-30B-A3B-Instruct-4bit) |
27
  | **Quantization** | 4-bit (MLX) |
28
  | **Language** | ARO |
29
- | **Training samples** | 861 |
30
- | **Syntax pass rate** | 47% |
31
- | **Source label** | dpo |
32
 
33
  ## Links
34
 
@@ -108,7 +108,7 @@ Key features:
108
 
109
  This model was trained with the ARO training pipeline:
110
 
111
- 1. **Corpus collection** — 861 samples from Examples, Book, Wiki, Proposals, and real-world ARO applications
112
  2. **Supervised fine-tuning** — LoRA on all code generation, debugging, Q&A, and explanation tasks
113
  3. **DPO preference training** — using `aro check` validation to build chosen/rejected pairs
114
  4. **Iterative self-improvement** — multiple rounds of generate-validate-retrain
 
26
  | **Base model** | [mlx-community/Qwen3-Coder-30B-A3B-Instruct-4bit](https://huggingface.co/mlx-community/Qwen3-Coder-30B-A3B-Instruct-4bit) |
27
  | **Quantization** | 4-bit (MLX) |
28
  | **Language** | ARO |
29
+ | **Training samples** | 777 |
30
+ | **Syntax pass rate** | 30% |
31
+ | **Source label** | distill_student |
32
 
33
  ## Links
34
 
 
108
 
109
  This model was trained with the ARO training pipeline:
110
 
111
+ 1. **Corpus collection** — 777 samples from Examples, Book, Wiki, Proposals, and real-world ARO applications
112
  2. **Supervised fine-tuning** — LoRA on all code generation, debugging, Q&A, and explanation tasks
113
  3. **DPO preference training** — using `aro check` validation to build chosen/rejected pairs
114
  4. **Iterative self-improvement** — multiple rounds of generate-validate-retrain
chat_template.jinja CHANGED
@@ -1,131 +1,85 @@
1
- {% macro render_item_list(item_list, tag_name='required') %}
2
- {%- if item_list is defined and item_list is iterable and item_list | length > 0 %}
3
- {%- if tag_name %}{{- '\n<' ~ tag_name ~ '>' -}}{% endif %}
4
- {{- '[' }}
5
- {%- for item in item_list -%}
6
- {%- if loop.index > 1 %}{{- ", "}}{% endif -%}
7
- {%- if item is string -%}
8
- {{ "`" ~ item ~ "`" }}
9
- {%- else -%}
10
- {{ item }}
11
- {%- endif -%}
12
- {%- endfor -%}
13
- {{- ']' }}
14
- {%- if tag_name %}{{- '</' ~ tag_name ~ '>' -}}{% endif %}
15
  {%- endif %}
16
- {% endmacro %}
17
-
18
- {%- if messages[0]["role"] == "system" %}
19
- {%- set system_message = messages[0]["content"] %}
20
- {%- set loop_messages = messages[1:] %}
21
- {%- else %}
22
- {%- set loop_messages = messages %}
23
- {%- endif %}
24
-
25
- {%- if not tools is defined %}
26
- {%- set tools = [] %}
27
- {%- endif %}
28
-
29
- {%- if system_message is defined %}
30
- {{- "<|im_start|>system\n" + system_message }}
31
  {%- else %}
32
- {%- if tools is iterable and tools | length > 0 %}
33
- {{- "<|im_start|>system\nYou are Qwen, a helpful AI assistant that can interact with a computer to solve tasks." }}
34
  {%- endif %}
35
  {%- endif %}
36
- {%- if tools is iterable and tools | length > 0 %}
37
- {{- "\n\nYou have access to the following functions:\n\n" }}
38
- {{- "<tools>" }}
39
- {%- for tool in tools %}
40
- {%- if tool.function is defined %}
41
- {%- set tool = tool.function %}
42
- {%- endif %}
43
- {{- "\n<function>\n<name>" ~ tool.name ~ "</name>" }}
44
- {{- '\n<description>' ~ (tool.description | trim) ~ '</description>' }}
45
- {{- '\n<parameters>' }}
46
- {%- for param_name, param_fields in tool.parameters.properties|items %}
47
- {{- '\n<parameter>' }}
48
- {{- '\n<name>' ~ param_name ~ '</name>' }}
49
- {%- if param_fields.type is defined %}
50
- {{- '\n<type>' ~ (param_fields.type | string) ~ '</type>' }}
 
 
 
 
 
51
  {%- endif %}
52
- {%- if param_fields.description is defined %}
53
- {{- '\n<description>' ~ (param_fields.description | trim) ~ '</description>' }}
 
 
 
 
54
  {%- endif %}
55
- {{- render_item_list(param_fields.enum, 'enum') }}
56
- {%- set handled_keys = ['type', 'description', 'enum', 'required'] %}
57
- {%- for json_key in param_fields.keys() | reject("in", handled_keys) %}
58
- {%- set normed_json_key = json_key | replace("-", "_") | replace(" ", "_") | replace("$", "") %}
59
- {%- if param_fields[json_key] is mapping %}
60
- {{- '\n<' ~ normed_json_key ~ '>' ~ (param_fields[json_key] | tojson | safe) ~ '</' ~ normed_json_key ~ '>' }}
 
 
 
 
 
 
 
 
 
 
61
  {%- else %}
62
- {{-'\n<' ~ normed_json_key ~ '>' ~ (param_fields[json_key] | string) ~ '</' ~ normed_json_key ~ '>' }}
63
  {%- endif %}
 
64
  {%- endfor %}
65
- {{- render_item_list(param_fields.required, 'required') }}
66
- {{- '\n</parameter>' }}
67
- {%- endfor %}
68
- {{- render_item_list(tool.parameters.required, 'required') }}
69
- {{- '\n</parameters>' }}
70
- {%- if tool.return is defined %}
71
- {%- if tool.return is mapping %}
72
- {{- '\n<return>' ~ (tool.return | tojson | safe) ~ '</return>' }}
73
- {%- else %}
74
- {{- '\n<return>' ~ (tool.return | string) ~ '</return>' }}
75
- {%- endif %}
76
  {%- endif %}
77
- {{- '\n</function>' }}
78
- {%- endfor %}
79
- {{- "\n</tools>" }}
80
- {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n<tool_call>\n<function=example_function_name>\n<parameter=example_parameter_1>\nvalue_1\n</parameter>\n<parameter=example_parameter_2>\nThis is the value for the second parameter\nthat can span\nmultiple lines\n</parameter>\n</function>\n</tool_call>\n\n<IMPORTANT>\nReminder:\n- Function calls MUST follow the specified format: an inner <function=...></function> block must be nested within <tool_call></tool_call> XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n</IMPORTANT>' }}
81
- {%- endif %}
82
- {%- if system_message is defined %}
83
- {{- '<|im_end|>\n' }}
84
- {%- else %}
85
- {%- if tools is iterable and tools | length > 0 %}
86
  {{- '<|im_end|>\n' }}
87
- {%- endif %}
88
- {%- endif %}
89
- {%- for message in loop_messages %}
90
- {%- if message.role == "assistant" and message.tool_calls is defined and message.tool_calls is iterable and message.tool_calls | length > 0 %}
91
- {{- '<|im_start|>' + message.role }}
92
- {%- if message.content is defined and message.content is string and message.content | trim | length > 0 %}
93
- {{- '\n' + message.content | trim + '\n' }}
94
- {%- endif %}
95
- {%- for tool_call in message.tool_calls %}
96
- {%- if tool_call.function is defined %}
97
- {%- set tool_call = tool_call.function %}
98
- {%- endif %}
99
- {{- '\n<tool_call>\n<function=' + tool_call.name + '>\n' }}
100
- {%- if tool_call.arguments is defined %}
101
- {%- for args_name, args_value in tool_call.arguments|items %}
102
- {{- '<parameter=' + args_name + '>\n' }}
103
- {%- set args_value = args_value if args_value is string else args_value | string %}
104
- {{- args_value }}
105
- {{- '\n</parameter>\n' }}
106
- {%- endfor %}
107
- {%- endif %}
108
- {{- '</function>\n</tool_call>' }}
109
- {%- endfor %}
110
- {{- '<|im_end|>\n' }}
111
- {%- elif message.role == "user" or message.role == "system" or message.role == "assistant" %}
112
- {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
113
  {%- elif message.role == "tool" %}
114
- {%- if loop.previtem and loop.previtem.role != "tool" %}
115
- {{- '<|im_start|>user\n' }}
116
  {%- endif %}
117
- {{- '<tool_response>\n' }}
118
  {{- message.content }}
119
- {{- '\n</tool_response>\n' }}
120
- {%- if not loop.last and loop.nextitem.role != "tool" %}
121
- {{- '<|im_end|>\n' }}
122
- {%- elif loop.last %}
123
  {{- '<|im_end|>\n' }}
124
  {%- endif %}
125
- {%- else %}
126
- {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>\n' }}
127
  {%- endif %}
128
  {%- endfor %}
129
  {%- if add_generation_prompt %}
130
  {{- '<|im_start|>assistant\n' }}
131
- {%- endif %}
 
 
 
 
1
+ {%- if tools %}
2
+ {{- '<|im_start|>system\n' }}
3
+ {%- if messages[0].role == 'system' %}
4
+ {{- messages[0].content + '\n\n' }}
 
 
 
 
 
 
 
 
 
 
5
  {%- endif %}
6
+ {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
7
+ {%- for tool in tools %}
8
+ {{- "\n" }}
9
+ {{- tool | tojson }}
10
+ {%- endfor %}
11
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
 
 
 
 
 
 
 
 
 
12
  {%- else %}
13
+ {%- if messages[0].role == 'system' %}
14
+ {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
15
  {%- endif %}
16
  {%- endif %}
17
+ {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
18
+ {%- for message in messages[::-1] %}
19
+ {%- set index = (messages|length - 1) - loop.index0 %}
20
+ {%- if ns.multi_step_tool and message.role == "user" and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
21
+ {%- set ns.multi_step_tool = false %}
22
+ {%- set ns.last_query_index = index %}
23
+ {%- endif %}
24
+ {%- endfor %}
25
+ {%- for message in messages %}
26
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
27
+ {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
28
+ {%- elif message.role == "assistant" %}
29
+ {%- set content = message.content %}
30
+ {%- set reasoning_content = '' %}
31
+ {%- if message.reasoning_content is defined and message.reasoning_content is not none %}
32
+ {%- set reasoning_content = message.reasoning_content %}
33
+ {%- else %}
34
+ {%- if '</think>' in message.content %}
35
+ {%- set content = message.content.split('</think>')[-1].lstrip('\n') %}
36
+ {%- set reasoning_content = message.content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
37
  {%- endif %}
38
+ {%- endif %}
39
+ {%- if loop.index0 > ns.last_query_index %}
40
+ {%- if loop.last or (not loop.last and reasoning_content) %}
41
+ {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
42
+ {%- else %}
43
+ {{- '<|im_start|>' + message.role + '\n' + content }}
44
  {%- endif %}
45
+ {%- else %}
46
+ {{- '<|im_start|>' + message.role + '\n' + content }}
47
+ {%- endif %}
48
+ {%- if message.tool_calls %}
49
+ {%- for tool_call in message.tool_calls %}
50
+ {%- if (loop.first and content) or (not loop.first) %}
51
+ {{- '\n' }}
52
+ {%- endif %}
53
+ {%- if tool_call.function %}
54
+ {%- set tool_call = tool_call.function %}
55
+ {%- endif %}
56
+ {{- '<tool_call>\n{"name": "' }}
57
+ {{- tool_call.name }}
58
+ {{- '", "arguments": ' }}
59
+ {%- if tool_call.arguments is string %}
60
+ {{- tool_call.arguments }}
61
  {%- else %}
62
+ {{- tool_call.arguments | tojson }}
63
  {%- endif %}
64
+ {{- '}\n</tool_call>' }}
65
  {%- endfor %}
 
 
 
 
 
 
 
 
 
 
 
66
  {%- endif %}
 
 
 
 
 
 
 
 
 
67
  {{- '<|im_end|>\n' }}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  {%- elif message.role == "tool" %}
69
+ {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
70
+ {{- '<|im_start|>user' }}
71
  {%- endif %}
72
+ {{- '\n<tool_response>\n' }}
73
  {{- message.content }}
74
+ {{- '\n</tool_response>' }}
75
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
 
 
76
  {{- '<|im_end|>\n' }}
77
  {%- endif %}
 
 
78
  {%- endif %}
79
  {%- endfor %}
80
  {%- if add_generation_prompt %}
81
  {{- '<|im_start|>assistant\n' }}
82
+ {%- if enable_thinking is defined and enable_thinking is false %}
83
+ {{- '<think>\n\n</think>\n\n' }}
84
+ {%- endif %}
85
+ {%- endif %}
config.json CHANGED
@@ -1,434 +1,38 @@
1
  {
2
  "architectures": [
3
- "Qwen3MoeForCausalLM"
4
  ],
 
5
  "attention_dropout": 0.0,
6
- "decoder_sparse_step": 1,
7
- "eos_token_id": [
8
- 151645,
9
- 151643
10
- ],
11
  "head_dim": 128,
12
  "hidden_act": "silu",
13
- "hidden_size": 2048,
14
  "initializer_range": 0.02,
15
- "intermediate_size": 5472,
16
- "max_position_embeddings": 262144,
17
- "max_window_layers": 28,
18
- "mlp_only_layers": [],
19
- "model_type": "qwen3_moe",
20
- "moe_intermediate_size": 768,
21
- "norm_topk_prob": true,
22
  "num_attention_heads": 32,
23
- "num_experts": 128,
24
- "num_experts_per_tok": 8,
25
- "num_hidden_layers": 48,
26
- "num_key_value_heads": 4,
27
- "output_router_logits": false,
28
- "qkv_bias": false,
29
  "quantization": {
30
  "group_size": 64,
31
- "bits": 4,
32
- "model.layers.0.mlp.gate": {
33
- "group_size": 64,
34
- "bits": 8
35
- },
36
- "model.layers.1.mlp.gate": {
37
- "group_size": 64,
38
- "bits": 8
39
- },
40
- "model.layers.2.mlp.gate": {
41
- "group_size": 64,
42
- "bits": 8
43
- },
44
- "model.layers.3.mlp.gate": {
45
- "group_size": 64,
46
- "bits": 8
47
- },
48
- "model.layers.4.mlp.gate": {
49
- "group_size": 64,
50
- "bits": 8
51
- },
52
- "model.layers.5.mlp.gate": {
53
- "group_size": 64,
54
- "bits": 8
55
- },
56
- "model.layers.6.mlp.gate": {
57
- "group_size": 64,
58
- "bits": 8
59
- },
60
- "model.layers.7.mlp.gate": {
61
- "group_size": 64,
62
- "bits": 8
63
- },
64
- "model.layers.8.mlp.gate": {
65
- "group_size": 64,
66
- "bits": 8
67
- },
68
- "model.layers.9.mlp.gate": {
69
- "group_size": 64,
70
- "bits": 8
71
- },
72
- "model.layers.10.mlp.gate": {
73
- "group_size": 64,
74
- "bits": 8
75
- },
76
- "model.layers.11.mlp.gate": {
77
- "group_size": 64,
78
- "bits": 8
79
- },
80
- "model.layers.12.mlp.gate": {
81
- "group_size": 64,
82
- "bits": 8
83
- },
84
- "model.layers.13.mlp.gate": {
85
- "group_size": 64,
86
- "bits": 8
87
- },
88
- "model.layers.14.mlp.gate": {
89
- "group_size": 64,
90
- "bits": 8
91
- },
92
- "model.layers.15.mlp.gate": {
93
- "group_size": 64,
94
- "bits": 8
95
- },
96
- "model.layers.16.mlp.gate": {
97
- "group_size": 64,
98
- "bits": 8
99
- },
100
- "model.layers.17.mlp.gate": {
101
- "group_size": 64,
102
- "bits": 8
103
- },
104
- "model.layers.18.mlp.gate": {
105
- "group_size": 64,
106
- "bits": 8
107
- },
108
- "model.layers.19.mlp.gate": {
109
- "group_size": 64,
110
- "bits": 8
111
- },
112
- "model.layers.20.mlp.gate": {
113
- "group_size": 64,
114
- "bits": 8
115
- },
116
- "model.layers.21.mlp.gate": {
117
- "group_size": 64,
118
- "bits": 8
119
- },
120
- "model.layers.22.mlp.gate": {
121
- "group_size": 64,
122
- "bits": 8
123
- },
124
- "model.layers.23.mlp.gate": {
125
- "group_size": 64,
126
- "bits": 8
127
- },
128
- "model.layers.24.mlp.gate": {
129
- "group_size": 64,
130
- "bits": 8
131
- },
132
- "model.layers.25.mlp.gate": {
133
- "group_size": 64,
134
- "bits": 8
135
- },
136
- "model.layers.26.mlp.gate": {
137
- "group_size": 64,
138
- "bits": 8
139
- },
140
- "model.layers.27.mlp.gate": {
141
- "group_size": 64,
142
- "bits": 8
143
- },
144
- "model.layers.28.mlp.gate": {
145
- "group_size": 64,
146
- "bits": 8
147
- },
148
- "model.layers.29.mlp.gate": {
149
- "group_size": 64,
150
- "bits": 8
151
- },
152
- "model.layers.30.mlp.gate": {
153
- "group_size": 64,
154
- "bits": 8
155
- },
156
- "model.layers.31.mlp.gate": {
157
- "group_size": 64,
158
- "bits": 8
159
- },
160
- "model.layers.32.mlp.gate": {
161
- "group_size": 64,
162
- "bits": 8
163
- },
164
- "model.layers.33.mlp.gate": {
165
- "group_size": 64,
166
- "bits": 8
167
- },
168
- "model.layers.34.mlp.gate": {
169
- "group_size": 64,
170
- "bits": 8
171
- },
172
- "model.layers.35.mlp.gate": {
173
- "group_size": 64,
174
- "bits": 8
175
- },
176
- "model.layers.36.mlp.gate": {
177
- "group_size": 64,
178
- "bits": 8
179
- },
180
- "model.layers.37.mlp.gate": {
181
- "group_size": 64,
182
- "bits": 8
183
- },
184
- "model.layers.38.mlp.gate": {
185
- "group_size": 64,
186
- "bits": 8
187
- },
188
- "model.layers.39.mlp.gate": {
189
- "group_size": 64,
190
- "bits": 8
191
- },
192
- "model.layers.40.mlp.gate": {
193
- "group_size": 64,
194
- "bits": 8
195
- },
196
- "model.layers.41.mlp.gate": {
197
- "group_size": 64,
198
- "bits": 8
199
- },
200
- "model.layers.42.mlp.gate": {
201
- "group_size": 64,
202
- "bits": 8
203
- },
204
- "model.layers.43.mlp.gate": {
205
- "group_size": 64,
206
- "bits": 8
207
- },
208
- "model.layers.44.mlp.gate": {
209
- "group_size": 64,
210
- "bits": 8
211
- },
212
- "model.layers.45.mlp.gate": {
213
- "group_size": 64,
214
- "bits": 8
215
- },
216
- "model.layers.46.mlp.gate": {
217
- "group_size": 64,
218
- "bits": 8
219
- },
220
- "model.layers.47.mlp.gate": {
221
- "group_size": 64,
222
- "bits": 8
223
- }
224
  },
225
  "quantization_config": {
226
  "group_size": 64,
227
- "bits": 4,
228
- "model.layers.0.mlp.gate": {
229
- "group_size": 64,
230
- "bits": 8
231
- },
232
- "model.layers.1.mlp.gate": {
233
- "group_size": 64,
234
- "bits": 8
235
- },
236
- "model.layers.2.mlp.gate": {
237
- "group_size": 64,
238
- "bits": 8
239
- },
240
- "model.layers.3.mlp.gate": {
241
- "group_size": 64,
242
- "bits": 8
243
- },
244
- "model.layers.4.mlp.gate": {
245
- "group_size": 64,
246
- "bits": 8
247
- },
248
- "model.layers.5.mlp.gate": {
249
- "group_size": 64,
250
- "bits": 8
251
- },
252
- "model.layers.6.mlp.gate": {
253
- "group_size": 64,
254
- "bits": 8
255
- },
256
- "model.layers.7.mlp.gate": {
257
- "group_size": 64,
258
- "bits": 8
259
- },
260
- "model.layers.8.mlp.gate": {
261
- "group_size": 64,
262
- "bits": 8
263
- },
264
- "model.layers.9.mlp.gate": {
265
- "group_size": 64,
266
- "bits": 8
267
- },
268
- "model.layers.10.mlp.gate": {
269
- "group_size": 64,
270
- "bits": 8
271
- },
272
- "model.layers.11.mlp.gate": {
273
- "group_size": 64,
274
- "bits": 8
275
- },
276
- "model.layers.12.mlp.gate": {
277
- "group_size": 64,
278
- "bits": 8
279
- },
280
- "model.layers.13.mlp.gate": {
281
- "group_size": 64,
282
- "bits": 8
283
- },
284
- "model.layers.14.mlp.gate": {
285
- "group_size": 64,
286
- "bits": 8
287
- },
288
- "model.layers.15.mlp.gate": {
289
- "group_size": 64,
290
- "bits": 8
291
- },
292
- "model.layers.16.mlp.gate": {
293
- "group_size": 64,
294
- "bits": 8
295
- },
296
- "model.layers.17.mlp.gate": {
297
- "group_size": 64,
298
- "bits": 8
299
- },
300
- "model.layers.18.mlp.gate": {
301
- "group_size": 64,
302
- "bits": 8
303
- },
304
- "model.layers.19.mlp.gate": {
305
- "group_size": 64,
306
- "bits": 8
307
- },
308
- "model.layers.20.mlp.gate": {
309
- "group_size": 64,
310
- "bits": 8
311
- },
312
- "model.layers.21.mlp.gate": {
313
- "group_size": 64,
314
- "bits": 8
315
- },
316
- "model.layers.22.mlp.gate": {
317
- "group_size": 64,
318
- "bits": 8
319
- },
320
- "model.layers.23.mlp.gate": {
321
- "group_size": 64,
322
- "bits": 8
323
- },
324
- "model.layers.24.mlp.gate": {
325
- "group_size": 64,
326
- "bits": 8
327
- },
328
- "model.layers.25.mlp.gate": {
329
- "group_size": 64,
330
- "bits": 8
331
- },
332
- "model.layers.26.mlp.gate": {
333
- "group_size": 64,
334
- "bits": 8
335
- },
336
- "model.layers.27.mlp.gate": {
337
- "group_size": 64,
338
- "bits": 8
339
- },
340
- "model.layers.28.mlp.gate": {
341
- "group_size": 64,
342
- "bits": 8
343
- },
344
- "model.layers.29.mlp.gate": {
345
- "group_size": 64,
346
- "bits": 8
347
- },
348
- "model.layers.30.mlp.gate": {
349
- "group_size": 64,
350
- "bits": 8
351
- },
352
- "model.layers.31.mlp.gate": {
353
- "group_size": 64,
354
- "bits": 8
355
- },
356
- "model.layers.32.mlp.gate": {
357
- "group_size": 64,
358
- "bits": 8
359
- },
360
- "model.layers.33.mlp.gate": {
361
- "group_size": 64,
362
- "bits": 8
363
- },
364
- "model.layers.34.mlp.gate": {
365
- "group_size": 64,
366
- "bits": 8
367
- },
368
- "model.layers.35.mlp.gate": {
369
- "group_size": 64,
370
- "bits": 8
371
- },
372
- "model.layers.36.mlp.gate": {
373
- "group_size": 64,
374
- "bits": 8
375
- },
376
- "model.layers.37.mlp.gate": {
377
- "group_size": 64,
378
- "bits": 8
379
- },
380
- "model.layers.38.mlp.gate": {
381
- "group_size": 64,
382
- "bits": 8
383
- },
384
- "model.layers.39.mlp.gate": {
385
- "group_size": 64,
386
- "bits": 8
387
- },
388
- "model.layers.40.mlp.gate": {
389
- "group_size": 64,
390
- "bits": 8
391
- },
392
- "model.layers.41.mlp.gate": {
393
- "group_size": 64,
394
- "bits": 8
395
- },
396
- "model.layers.42.mlp.gate": {
397
- "group_size": 64,
398
- "bits": 8
399
- },
400
- "model.layers.43.mlp.gate": {
401
- "group_size": 64,
402
- "bits": 8
403
- },
404
- "model.layers.44.mlp.gate": {
405
- "group_size": 64,
406
- "bits": 8
407
- },
408
- "model.layers.45.mlp.gate": {
409
- "group_size": 64,
410
- "bits": 8
411
- },
412
- "model.layers.46.mlp.gate": {
413
- "group_size": 64,
414
- "bits": 8
415
- },
416
- "model.layers.47.mlp.gate": {
417
- "group_size": 64,
418
- "bits": 8
419
- }
420
  },
421
  "rms_norm_eps": 1e-06,
422
  "rope_scaling": null,
423
- "rope_theta": 10000000,
424
- "router_aux_loss_coef": 0.0,
425
- "shared_expert_intermediate_size": 0,
426
  "sliding_window": null,
427
  "tie_word_embeddings": false,
428
  "torch_dtype": "bfloat16",
429
- "transformers_version": "4.52.3",
430
  "use_cache": true,
431
- "use_qk_norm": true,
432
  "use_sliding_window": false,
433
  "vocab_size": 151936
434
  }
 
1
  {
2
  "architectures": [
3
+ "Qwen3ForCausalLM"
4
  ],
5
+ "attention_bias": false,
6
  "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "eos_token_id": 151645,
 
 
 
9
  "head_dim": 128,
10
  "hidden_act": "silu",
11
+ "hidden_size": 4096,
12
  "initializer_range": 0.02,
13
+ "intermediate_size": 12288,
14
+ "max_position_embeddings": 40960,
15
+ "max_window_layers": 36,
16
+ "model_type": "qwen3",
 
 
 
17
  "num_attention_heads": 32,
18
+ "num_hidden_layers": 36,
19
+ "num_key_value_heads": 8,
 
 
 
 
20
  "quantization": {
21
  "group_size": 64,
22
+ "bits": 4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  },
24
  "quantization_config": {
25
  "group_size": 64,
26
+ "bits": 4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  },
28
  "rms_norm_eps": 1e-06,
29
  "rope_scaling": null,
30
+ "rope_theta": 1000000,
 
 
31
  "sliding_window": null,
32
  "tie_word_embeddings": false,
33
  "torch_dtype": "bfloat16",
34
+ "transformers_version": "4.51.0",
35
  "use_cache": true,
 
36
  "use_sliding_window": false,
37
  "vocab_size": 151936
38
  }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6795684e67d21c9463e679f00e3f2d079a00f1fd028d6c1cc0a31899f521d918
3
+ size 4607835164
model.safetensors.index.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -6,10 +6,10 @@
6
  "eos_token": "<|im_end|>",
7
  "errors": "replace",
8
  "is_local": true,
9
- "model_max_length": 1048576,
10
  "pad_token": "<|endoftext|>",
11
  "split_special_tokens": false,
12
  "tokenizer_class": "Qwen2Tokenizer",
13
- "tool_parser_type": "qwen3_coder",
14
  "unk_token": null
15
  }
 
6
  "eos_token": "<|im_end|>",
7
  "errors": "replace",
8
  "is_local": true,
9
+ "model_max_length": 131072,
10
  "pad_token": "<|endoftext|>",
11
  "split_special_tokens": false,
12
  "tokenizer_class": "Qwen2Tokenizer",
13
+ "tool_parser_type": "json_tools",
14
  "unk_token": null
15
  }