Upload 7 files
Browse files- chat_template.jinja +19 -9
- config.json +7 -7
- model.safetensors +2 -2
chat_template.jinja
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
-
{%- macro format_parameters(properties, required) -%}
|
| 2 |
{%- set standard_keys = ['description', 'type', 'properties', 'required', 'nullable'] -%}
|
| 3 |
{%- set ns = namespace(found_first=false) -%}
|
| 4 |
{%- for key, value in properties | dictsort -%}
|
| 5 |
{%- set add_comma = false -%}
|
| 6 |
-
{%- if key not in standard_keys -%}
|
| 7 |
{%- if ns.found_first %},{% endif -%}
|
| 8 |
{%- set ns.found_first = true -%}
|
| 9 |
{{ key }}:{
|
|
@@ -65,7 +65,7 @@
|
|
| 65 |
{%- elif value is mapping -%}
|
| 66 |
{%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
|
| 67 |
properties:{
|
| 68 |
-
{{- format_parameters(value, value['required'] | default([])) -}}
|
| 69 |
}
|
| 70 |
{%- endif -%}
|
| 71 |
{%- if value['required'] -%}
|
|
@@ -178,18 +178,21 @@
|
|
| 178 |
{#- Handle System/Tool Definitions Block -#}
|
| 179 |
{%- if (enable_thinking is defined and enable_thinking) or tools or messages[0]['role'] in ['system', 'developer'] -%}
|
| 180 |
{{- '<|turn>system\n' -}}
|
| 181 |
-
|
| 182 |
{#- Inject Thinking token at the very top of the FIRST system turn -#}
|
| 183 |
{%- if enable_thinking is defined and enable_thinking -%}
|
| 184 |
{{- '<|think|>\n' -}}
|
| 185 |
{%- set ns.prev_message_type = 'think' -%}
|
| 186 |
{%- endif -%}
|
| 187 |
-
|
| 188 |
{%- if messages[0]['role'] in ['system', 'developer'] -%}
|
| 189 |
-
{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
{%- set loop_messages = messages[1:] -%}
|
| 191 |
{%- endif -%}
|
| 192 |
-
|
| 193 |
{%- if tools -%}
|
| 194 |
{%- for tool in tools %}
|
| 195 |
{{- '<|tool>' -}}
|
|
@@ -198,7 +201,6 @@
|
|
| 198 |
{%- endfor %}
|
| 199 |
{%- set ns.prev_message_type = 'tool' -%}
|
| 200 |
{%- endif -%}
|
| 201 |
-
|
| 202 |
{{- '<turn|>\n' -}}
|
| 203 |
{%- endif %}
|
| 204 |
|
|
@@ -302,6 +304,7 @@
|
|
| 302 |
{%- endfor -%}
|
| 303 |
{%- endif -%}
|
| 304 |
|
|
|
|
| 305 |
{%- if message['content'] is string -%}
|
| 306 |
{%- if role == 'model' -%}
|
| 307 |
{{- strip_thinking(message['content']) -}}
|
|
@@ -328,10 +331,14 @@
|
|
| 328 |
{%- endif -%}
|
| 329 |
{%- endfor -%}
|
| 330 |
{%- endif -%}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 331 |
|
| 332 |
{%- if ns.prev_message_type == 'tool_call' and not ns_tr_out.flag -%}
|
| 333 |
{{- '<|tool_response>' -}}
|
| 334 |
-
{%- elif not (ns_tr_out.flag and not
|
| 335 |
{{- '<turn|>\n' -}}
|
| 336 |
{%- endif -%}
|
| 337 |
{%- endif -%}
|
|
@@ -340,5 +347,8 @@
|
|
| 340 |
{%- if add_generation_prompt -%}
|
| 341 |
{%- if ns.prev_message_type != 'tool_response' and ns.prev_message_type != 'tool_call' -%}
|
| 342 |
{{- '<|turn>model\n' -}}
|
|
|
|
|
|
|
|
|
|
| 343 |
{%- endif -%}
|
| 344 |
{%- endif -%}
|
|
|
|
| 1 |
+
{%- macro format_parameters(properties, required, filter_keys=false) -%}
|
| 2 |
{%- set standard_keys = ['description', 'type', 'properties', 'required', 'nullable'] -%}
|
| 3 |
{%- set ns = namespace(found_first=false) -%}
|
| 4 |
{%- for key, value in properties | dictsort -%}
|
| 5 |
{%- set add_comma = false -%}
|
| 6 |
+
{%- if not filter_keys or key not in standard_keys -%}
|
| 7 |
{%- if ns.found_first %},{% endif -%}
|
| 8 |
{%- set ns.found_first = true -%}
|
| 9 |
{{ key }}:{
|
|
|
|
| 65 |
{%- elif value is mapping -%}
|
| 66 |
{%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
|
| 67 |
properties:{
|
| 68 |
+
{{- format_parameters(value, value['required'] | default([]), filter_keys=true) -}}
|
| 69 |
}
|
| 70 |
{%- endif -%}
|
| 71 |
{%- if value['required'] -%}
|
|
|
|
| 178 |
{#- Handle System/Tool Definitions Block -#}
|
| 179 |
{%- if (enable_thinking is defined and enable_thinking) or tools or messages[0]['role'] in ['system', 'developer'] -%}
|
| 180 |
{{- '<|turn>system\n' -}}
|
|
|
|
| 181 |
{#- Inject Thinking token at the very top of the FIRST system turn -#}
|
| 182 |
{%- if enable_thinking is defined and enable_thinking -%}
|
| 183 |
{{- '<|think|>\n' -}}
|
| 184 |
{%- set ns.prev_message_type = 'think' -%}
|
| 185 |
{%- endif -%}
|
|
|
|
| 186 |
{%- if messages[0]['role'] in ['system', 'developer'] -%}
|
| 187 |
+
{%- if messages[0]['content'] is string -%}
|
| 188 |
+
{{- messages[0]['content'] | trim -}}
|
| 189 |
+
{%- elif messages[0]['content'] is sequence -%}
|
| 190 |
+
{%- for item in messages[0]['content'] -%}
|
| 191 |
+
{{- item['text'] | trim + ' '-}}
|
| 192 |
+
{%- endfor -%}
|
| 193 |
+
{%- endif -%}
|
| 194 |
{%- set loop_messages = messages[1:] -%}
|
| 195 |
{%- endif -%}
|
|
|
|
| 196 |
{%- if tools -%}
|
| 197 |
{%- for tool in tools %}
|
| 198 |
{{- '<|tool>' -}}
|
|
|
|
| 201 |
{%- endfor %}
|
| 202 |
{%- set ns.prev_message_type = 'tool' -%}
|
| 203 |
{%- endif -%}
|
|
|
|
| 204 |
{{- '<turn|>\n' -}}
|
| 205 |
{%- endif %}
|
| 206 |
|
|
|
|
| 304 |
{%- endfor -%}
|
| 305 |
{%- endif -%}
|
| 306 |
|
| 307 |
+
{%- set captured_content -%}
|
| 308 |
{%- if message['content'] is string -%}
|
| 309 |
{%- if role == 'model' -%}
|
| 310 |
{{- strip_thinking(message['content']) -}}
|
|
|
|
| 331 |
{%- endif -%}
|
| 332 |
{%- endfor -%}
|
| 333 |
{%- endif -%}
|
| 334 |
+
{%- endset -%}
|
| 335 |
+
|
| 336 |
+
{{- captured_content -}}
|
| 337 |
+
{%- set has_content = captured_content | trim | length > 0 -%}
|
| 338 |
|
| 339 |
{%- if ns.prev_message_type == 'tool_call' and not ns_tr_out.flag -%}
|
| 340 |
{{- '<|tool_response>' -}}
|
| 341 |
+
{%- elif not (ns_tr_out.flag and not has_content) -%}
|
| 342 |
{{- '<turn|>\n' -}}
|
| 343 |
{%- endif -%}
|
| 344 |
{%- endif -%}
|
|
|
|
| 347 |
{%- if add_generation_prompt -%}
|
| 348 |
{%- if ns.prev_message_type != 'tool_response' and ns.prev_message_type != 'tool_call' -%}
|
| 349 |
{{- '<|turn>model\n' -}}
|
| 350 |
+
{%- if not enable_thinking | default(false) -%}
|
| 351 |
+
{{- '<|channel>thought\n<channel|>' -}}
|
| 352 |
+
{%- endif -%}
|
| 353 |
{%- endif -%}
|
| 354 |
{%- endif -%}
|
config.json
CHANGED
|
@@ -29,22 +29,22 @@
|
|
| 29 |
"global_head_dim": 4,
|
| 30 |
"head_dim": 4,
|
| 31 |
"hidden_activation": "gelu_pytorch_tanh",
|
| 32 |
-
"hidden_size":
|
| 33 |
"hidden_size_per_layer_input": 0,
|
| 34 |
"initializer_range": 0.02,
|
| 35 |
-
"intermediate_size":
|
| 36 |
"layer_types": [
|
| 37 |
"sliding_attention",
|
| 38 |
"full_attention"
|
| 39 |
],
|
| 40 |
"max_position_embeddings": 262144,
|
| 41 |
"model_type": "gemma4_text",
|
| 42 |
-
"moe_intermediate_size":
|
| 43 |
-
"num_attention_heads":
|
| 44 |
-
"num_experts":
|
| 45 |
"num_global_key_value_heads": 2,
|
| 46 |
"num_hidden_layers": 2,
|
| 47 |
-
"num_key_value_heads":
|
| 48 |
"num_kv_shared_layers": 0,
|
| 49 |
"pad_token_id": 0,
|
| 50 |
"rms_norm_eps": 1e-06,
|
|
@@ -61,7 +61,7 @@
|
|
| 61 |
},
|
| 62 |
"sliding_window": 1024,
|
| 63 |
"tie_word_embeddings": true,
|
| 64 |
-
"top_k_experts":
|
| 65 |
"use_bidirectional_attention": "vision",
|
| 66 |
"use_cache": true,
|
| 67 |
"use_double_wide_mlp": false,
|
|
|
|
| 29 |
"global_head_dim": 4,
|
| 30 |
"head_dim": 4,
|
| 31 |
"hidden_activation": "gelu_pytorch_tanh",
|
| 32 |
+
"hidden_size": 32,
|
| 33 |
"hidden_size_per_layer_input": 0,
|
| 34 |
"initializer_range": 0.02,
|
| 35 |
+
"intermediate_size": 64,
|
| 36 |
"layer_types": [
|
| 37 |
"sliding_attention",
|
| 38 |
"full_attention"
|
| 39 |
],
|
| 40 |
"max_position_embeddings": 262144,
|
| 41 |
"model_type": "gemma4_text",
|
| 42 |
+
"moe_intermediate_size": 64,
|
| 43 |
+
"num_attention_heads": 4,
|
| 44 |
+
"num_experts": 4,
|
| 45 |
"num_global_key_value_heads": 2,
|
| 46 |
"num_hidden_layers": 2,
|
| 47 |
+
"num_key_value_heads": 2,
|
| 48 |
"num_kv_shared_layers": 0,
|
| 49 |
"pad_token_id": 0,
|
| 50 |
"rms_norm_eps": 1e-06,
|
|
|
|
| 61 |
},
|
| 62 |
"sliding_window": 1024,
|
| 63 |
"tie_word_embeddings": true,
|
| 64 |
+
"top_k_experts": 2,
|
| 65 |
"use_bidirectional_attention": "vision",
|
| 66 |
"use_cache": true,
|
| 67 |
"use_double_wide_mlp": false,
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4492347ed73c791a7ae27b42ef4380ec85e3123aa341e8c23f72b20acf3dcc12
|
| 3 |
+
size 34167464
|