Hunterx commited on
Commit
c4c8357
·
verified ·
1 Parent(s): 0f27d83

Upload kimi_k2.6_fixed_template_v2.jinja

Browse files
Files changed (1) hide show
  1. kimi_k2.6_fixed_template_v2.jinja +270 -0
kimi_k2.6_fixed_template_v2.jinja ADDED
@@ -0,0 +1,270 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {# =========================
2
+ Fixed Kimi K2.6 Chat Template — v2 (Generic Tool Call Format)
3
+ Based on original Kimi K2.6 template with improvements merged from:
4
+ - https://gist.github.com/fakezeta/9e8e039c60332fcb143c6e805558afe0
5
+ - https://github.com/allanchan339/vLLM-Qwen3-3.5-3.6-chat-template-fix
6
+ - https://huggingface.co/froggeric/Qwen-Fixed-Chat-Templates
7
+ - ubergarm's "Qwen3.6-like" approach for llama.cpp compatibility
8
+
9
+ Fixes applied by Hunterx (https://huggingface.co/Hunterx):
10
+ v1 fixes (retained):
11
+ - Auto-close <think> before tool calls
12
+ - Strict tool calling instructions in system prompt
13
+ - Missing function name in tool call rendering
14
+ - Replaced .get() Python dict syntax with Jinja-native attribute access
15
+ - Recognizes both </think> and </thinking> as valid close tags
16
+ - <|think_on|> / <|think_off|> toggles
17
+ - developer role support
18
+ - LM Studio / llama.cpp Jinja runtime compatible
19
+
20
+ v2 changes:
21
+ - CRITICAL: Tool calls now output generic <tool_call> JSON format instead of
22
+ Kimi's native <|tool_call_begin|> special tokens. This allows llama.cpp,
23
+ ik_llama.cpp, oMLX, and LM Studio's generic tool parsers to detect and
24
+ route tool calls properly. The native tokens were being stripped or
25
+ ignored by every inference engine except vLLM.
26
+ - Tool call format in system prompt updated to match generic output
27
+ - Tool responses use generic <tool_response> tags for consistency
28
+ - String-form tool arguments parsed as JSON (from Qwen fix pattern)
29
+ ========================= #}
30
+
31
+ {#- Content rendering macro (handles text, image, video) -#}
32
+ {%- macro render_content(message) -%}
33
+ {%- if message.content is string -%}
34
+ {{ message.content }}
35
+ {%- elif message.content is not none -%}
36
+ {%- for item in message.content -%}
37
+ {%- if item.type == 'image' or item.type == 'image_url' -%}
38
+ <|media_begin|>image<|media_content|><|media_pad|><|media_end|>
39
+ {%- elif item.type == 'video' or item.type == 'video_url' -%}
40
+ <|kimi_k25_video_placeholder|>
41
+ {%- elif item.type == 'text' -%}
42
+ {{ item.text }}
43
+ {%- endif -%}
44
+ {%- endfor -%}
45
+ {%- endif -%}
46
+ {%- endmacro -%}
47
+
48
+ {#- Role header macro -#}
49
+ {%- macro set_roles(message) -%}
50
+ {%- if message.name is defined and message.name -%}
51
+ {%- set role_name = message.name -%}
52
+ {%- else -%}
53
+ {%- set role_name = message.role -%}
54
+ {%- endif -%}
55
+ {%- if message.role == 'user' -%}
56
+ <|im_user|>{{ role_name }}<|im_middle|>
57
+ {%- elif message.role == 'assistant' -%}
58
+ <|im_assistant|>{{ role_name }}<|im_middle|>
59
+ {%- elif message.role == 'developer' -%}
60
+ <|im_system|>{{ role_name }}<|im_middle|>
61
+ {%- else -%}
62
+ <|im_system|>{{ role_name }}<|im_middle|>
63
+ {%- endif -%}
64
+ {%- endmacro -%}
65
+
66
+ {#- Tool call rendering — GENERIC FORMAT for llama.cpp/oMLX/LM Studio compatibility -#}
67
+ {%- macro render_toolcalls(message) -%}
68
+ {%- for tool_call in message.tool_calls -%}
69
+ {%- if tool_call.function is defined -%}
70
+ {%- set fn = tool_call.function -%}
71
+ {%- else -%}
72
+ {%- set fn = tool_call -%}
73
+ {%- endif -%}
74
+ {%- if tool_call.id is defined -%}
75
+ {%- set call_id = tool_call.id -%}
76
+ {%- else -%}
77
+ {%- set call_id = 'call_' ~ loop.index0 -%}
78
+ {%- endif -%}
79
+
80
+ <tool_call>
81
+ {"name": "{{ fn.name }}", "arguments": {% if fn.arguments is string %}{{ fn.arguments }}{% elif fn.arguments is mapping %}{{ fn.arguments | tojson }}{% elif fn.arguments is defined and fn.arguments is not none %}{{ fn.arguments | tojson }}{% else %}{}{% endif %}}
82
+ </tool_call>
83
+ {%- endfor -%}
84
+ {%- endmacro -%}
85
+
86
+ {#- Thinking-toggle flag state -#}
87
+ {%- set ns_flags = namespace(enable_thinking=true) -%}
88
+ {%- if thinking is defined -%}
89
+ {%- set ns_flags.enable_thinking = thinking -%}
90
+ {%- endif -%}
91
+ {%- if enable_thinking is defined -%}
92
+ {%- set ns_flags.enable_thinking = enable_thinking -%}
93
+ {%- endif -%}
94
+
95
+ {%- set preserve_thinking = preserve_thinking | default(false) -%}
96
+
97
+ {%- if not messages -%}
98
+ {{- raise_exception('No messages provided.') -}}
99
+ {%- endif -%}
100
+
101
+ {#- Find last non-tool-call assistant message for history/suffix split -#}
102
+ {%- set ns = namespace(last_non_tool_call_assistant_msg=-1) -%}
103
+ {%- if not preserve_thinking -%}
104
+ {%- for idx in range(messages | length - 1, -1, -1) -%}
105
+ {%- if messages[idx].role == 'assistant' and not messages[idx].tool_calls is defined -%}
106
+ {%- set ns.last_non_tool_call_assistant_msg = idx -%}
107
+ {%- break -%}
108
+ {%- elif messages[idx].role == 'assistant' and messages[idx].tool_calls is defined and not messages[idx].tool_calls -%}
109
+ {%- set ns.last_non_tool_call_assistant_msg = idx -%}
110
+ {%- break -%}
111
+ {%- endif -%}
112
+ {%- endfor -%}
113
+ {%- endif -%}
114
+
115
+ {#- Split messages into history (reasoning stripped) and suffix (reasoning preserved) -#}
116
+ {%- set hist_msgs = messages[:ns.last_non_tool_call_assistant_msg + 1] -%}
117
+ {%- set suffix_msgs = messages[ns.last_non_tool_call_assistant_msg + 1:] -%}
118
+
119
+ {#- Render tool declarations with strict instructions -#}
120
+ {%- if tools -%}
121
+ {%- if tools_ts_str is defined and tools_ts_str -%}
122
+ <|im_system|>tool_declare<|im_middle|>{{ tools_ts_str }}
123
+ {%- else -%}
124
+ <|im_system|>tool_declare<|im_middle|>{{ tools | tojson(separators=(',', ':')) }}
125
+ {%- endif -%}
126
+
127
+ IMPORTANT: You MUST always close your thinking with </think> before emitting any tool call. Reasoning inside a tool call or its parameters is strictly forbidden and will cause a fatal error.
128
+
129
+ When you need to call a tool, output one or more JSON blocks in exactly this format and nothing after the final </tool_call>:
130
+
131
+ <tool_call>
132
+ {"name": "example_function_name", "arguments": {"param1": "value1", "param2": "value2"}}
133
+ </tool_call>
134
+
135
+ Rules:
136
+ - If a suitable tool exists for the user request, use it instead of answering from memory.
137
+ - If the user is modifying an existing object and a prior tool response contains its id, prefer the matching update tool and reuse that id.
138
+ - Put any reasoning or natural language BEFORE the first <tool_call>, never after the last </tool_call>.
139
+ - Include every required parameter.
140
+ - Preserve user-provided string values verbatim when possible for ids, names, titles, subjects, emails, and search queries.
141
+ - For object or array parameter values, write valid JSON.
142
+ - If no tool is needed, answer normally.<|im_end|>
143
+ {%- endif -%}
144
+
145
+ {#- ======== Render history messages (reasoning stripped) ======== -#}
146
+ {%- for message in hist_msgs -%}
147
+ {%- set content = render_content(message) | trim -%}
148
+
149
+ {#- Handle think toggles -#}
150
+ {%- if '<|think_off|>' in content -%}
151
+ {%- set ns_flags.enable_thinking = false -%}
152
+ {%- set content = content.replace('<|think_off|>', '') -%}
153
+ {%- endif -%}
154
+ {%- if '<|think_on|>' in content -%}
155
+ {%- set ns_flags.enable_thinking = true -%}
156
+ {%- set content = content.replace('<|think_on|>', '') -%}
157
+ {%- endif -%}
158
+ {%- set content = content | trim -%}
159
+
160
+ {{ set_roles(message) }}
161
+ {%- if message.role == 'assistant' -%}
162
+ {#- Strip reasoning from history — empty think block -#}
163
+ <think></think>{{ content }}
164
+ {%- if message.tool_calls is defined and message.tool_calls -%}
165
+ {{ render_toolcalls(message) }}
166
+ {%- endif -%}
167
+ {%- elif message.role == 'tool' -%}
168
+ {%- if message.tool_call_id is defined -%}
169
+ {%- set tool_call_id = message.tool_call_id -%}
170
+ {%- else -%}
171
+ {%- set tool_call_id = 'unknown' -%}
172
+ {%- endif -%}
173
+ <tool_response>
174
+ {{ content }}
175
+ </tool_response>
176
+ {%- elif message.role == 'system' or message.role == 'developer' -%}
177
+ {{ content }}
178
+ {%- elif message.content is not none -%}
179
+ {{ content }}
180
+ {%- endif -%}
181
+ <|im_end|>
182
+ {%- endfor -%}
183
+
184
+ {#- ======== Render suffix messages (reasoning preserved) ======== -#}
185
+ {%- for message in suffix_msgs -%}
186
+ {%- set content = render_content(message) | trim -%}
187
+
188
+ {#- Handle think toggles -#}
189
+ {%- if '<|think_off|>' in content -%}
190
+ {%- set ns_flags.enable_thinking = false -%}
191
+ {%- set content = content.replace('<|think_off|>', '') -%}
192
+ {%- endif -%}
193
+ {%- if '<|think_on|>' in content -%}
194
+ {%- set ns_flags.enable_thinking = true -%}
195
+ {%- set content = content.replace('<|think_on|>', '') -%}
196
+ {%- endif -%}
197
+ {%- set content = content | trim -%}
198
+
199
+ {{ set_roles(message) }}
200
+ {%- if message.role == 'assistant' -%}
201
+ {#- Auto-close unclosed <think> before tool calls -#}
202
+ {%- if message.tool_calls is defined and message.tool_calls and '<think>' in content -%}
203
+ {%- set last_think = content.rfind('<think>') -%}
204
+ {%- set last_close_think = content.rfind('</think>') -%}
205
+ {%- set last_close_thinking = content.rfind('</thinking>') -%}
206
+ {%- set last_close = last_close_think if last_close_think > last_close_thinking else last_close_thinking -%}
207
+ {%- if last_close < last_think or last_close == -1 -%}
208
+ {%- set content = content + '</think>' -%}
209
+ {%- endif -%}
210
+ {%- endif -%}
211
+
212
+ {#- Extract reasoning content (recognize both </think> and </thinking>) -#}
213
+ {%- if ns_flags.enable_thinking is false -%}
214
+ <think></think>{{ content }}
215
+ {%- else -%}
216
+ {%- set reasoning_content = '' -%}
217
+ {%- if message.reasoning is defined and message.reasoning is string -%}
218
+ {%- set reasoning_content = message.reasoning -%}
219
+ {%- elif message.reasoning_content is defined and message.reasoning_content is string -%}
220
+ {%- set reasoning_content = message.reasoning_content -%}
221
+ {%- else -%}
222
+ {#- Try to extract from content -#}
223
+ {%- set has_think_tag = false -%}
224
+ {%- set think_end_token = '</think>' -%}
225
+ {%- if '</think>' in content -%}
226
+ {%- set has_think_tag = true -%}
227
+ {%- elif '</thinking>' in content -%}
228
+ {%- set has_think_tag = true -%}
229
+ {%- set think_end_token = '</thinking>' -%}
230
+ {%- elif '<think>' in content -%}
231
+ {%- set reasoning_content = content.split('<think>')[-1].lstrip('\n') -%}
232
+ {%- set content = '' -%}
233
+ {%- endif -%}
234
+ {%- if has_think_tag -%}
235
+ {%- set reasoning_content = content.split(think_end_token)[0].rstrip('\n').split('<think>')[-1].lstrip('\n') -%}
236
+ {%- set content = content.split(think_end_token)[-1].lstrip('\n') -%}
237
+ {%- endif -%}
238
+ {%- endif -%}
239
+ <think>{{ reasoning_content }}</think>{{ content }}
240
+ {%- endif -%}
241
+ {%- if message.tool_calls is defined and message.tool_calls -%}
242
+ {{ render_toolcalls(message) }}
243
+ {%- endif -%}
244
+ {%- elif message.role == 'tool' -%}
245
+ {%- if message.tool_call_id is defined -%}
246
+ {%- set tool_call_id = message.tool_call_id -%}
247
+ {%- else -%}
248
+ {%- set tool_call_id = 'unknown' -%}
249
+ {%- endif -%}
250
+ <tool_response>
251
+ {{ content }}
252
+ </tool_response>
253
+ {%- elif message.role == 'system' or message.role == 'developer' -%}
254
+ {{ content }}
255
+ {%- elif message.content is not none -%}
256
+ {{ content }}
257
+ {%- endif -%}
258
+ <|im_end|>
259
+ {%- endfor -%}
260
+
261
+ {#- ======== Generation prompt ======== -#}
262
+ {%- if add_generation_prompt -%}
263
+ <|im_assistant|>assistant<|im_middle|>
264
+ {%- if ns_flags.enable_thinking is false -%}
265
+ <think></think>
266
+ {%- else -%}
267
+ <think>
268
+ {%- endif -%}
269
+ {%- endif -%}
270
+ 0