fix: chat template — null handling, reasoning preservation, turn-tag balance, input validation

#35
by lucianommartins - opened
Files changed (1) hide show
  1. chat_template.jinja +65 -41
chat_template.jinja CHANGED
@@ -116,7 +116,9 @@
116
  }
117
  {%- endmacro -%}
118
  {%- macro format_argument(argument, escape_keys=True) -%}
119
- {%- if argument is string -%}
 
 
120
  {{- '<|"|>' + argument + '<|"|>' -}}
121
  {%- elif argument is boolean -%}
122
  {{- 'true' if argument else 'false' -}}
@@ -172,18 +174,21 @@
172
  {{- '<tool_response|>' -}}
173
  {%- endmacro -%}
174
 
175
- {%- set ns = namespace(prev_message_type=None) -%}
 
176
  {%- set loop_messages = messages -%}
 
 
177
  {{- bos_token -}}
178
  {#- Handle System/Tool Definitions Block -#}
179
- {%- if (enable_thinking is defined and enable_thinking) or tools or messages[0]['role'] in ['system', 'developer'] -%}
180
  {{- '<|turn>system\n' -}}
181
  {#- Inject Thinking token at the very top of the FIRST system turn -#}
182
- {%- if enable_thinking is defined and enable_thinking -%}
183
  {{- '<|think|>\n' -}}
184
  {%- set ns.prev_message_type = 'think' -%}
185
  {%- endif -%}
186
- {%- if messages[0]['role'] in ['system', 'developer'] -%}
187
  {%- if messages[0]['content'] is string -%}
188
  {{- messages[0]['content'] | trim -}}
189
  {%- elif messages[0]['content'] is sequence -%}
@@ -217,31 +222,21 @@
217
  {%- if message['role'] != 'tool' -%}
218
  {%- set ns.prev_message_type = None -%}
219
  {%- set role = 'model' if message['role'] == 'assistant' else message['role'] -%}
220
- {#- Detect continuation: suppress duplicate <|turn>model when previous non-tool message was also assistant -#}
221
- {%- set prev_nt = namespace(role=None, found=false) -%}
222
- {%- if loop.index0 > 0 -%}
223
- {%- for j in range(loop.index0 - 1, -1, -1) -%}
224
- {%- if not prev_nt.found -%}
225
- {%- if loop_messages[j]['role'] != 'tool' -%}
226
- {%- set prev_nt.role = loop_messages[j]['role'] -%}
227
- {%- set prev_nt.found = true -%}
228
- {%- endif -%}
229
- {%- endif -%}
230
- {%- endfor -%}
231
- {%- endif -%}
232
- {%- set continue_same_model_turn = (role == 'model' and prev_nt.role == 'assistant') -%}
233
  {%- if not continue_same_model_turn -%}
234
  {{- '<|turn>' + role + '\n' }}
235
  {%- endif -%}
236
 
237
- {#- Render reasoning/reasoning_content as thinking channel -#}
238
  {%- set thinking_text = message.get('reasoning') or message.get('reasoning_content') -%}
239
- {%- if thinking_text and loop.index0 > ns_turn.last_user_idx and message.get('tool_calls') -%}
 
240
  {{- '<|channel>thought\n' + thinking_text + '\n<channel|>' -}}
241
  {%- endif -%}
242
 
243
- {%- if message['tool_calls'] -%}
244
- {%- for tool_call in message['tool_calls'] -%}
245
  {%- set function = tool_call['function'] -%}
246
  {{- '<|tool_call>call:' + function['name'] + '{' -}}
247
  {%- if function['arguments'] is mapping -%}
@@ -251,8 +246,13 @@
251
  {%- set ns_args.found_first = true -%}
252
  {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
253
  {%- endfor -%}
254
- {%- elif function['arguments'] is string -%}
255
- {{- function['arguments'] -}}
 
 
 
 
 
256
  {%- endif -%}
257
  {{- '}<tool_call|>' -}}
258
  {%- endfor -%}
@@ -262,8 +262,8 @@
262
  {%- set ns_tr_out = namespace(flag=false) -%}
263
  {%- if message.get('tool_responses') -%}
264
  {#- Legacy: tool_responses embedded on the assistant message (Google/Gemma native) -#}
265
- {%- for tool_response in message['tool_responses'] -%}
266
- {{- format_tool_response_block(tool_response['name'] | default('unknown'), tool_response['response']) -}}
267
  {%- set ns_tr_out.flag = true -%}
268
  {%- set ns.prev_message_type = 'tool_response' -%}
269
  {%- endfor -%}
@@ -277,8 +277,8 @@
277
  {%- else -%}
278
  {%- set follow = loop_messages[k] -%}
279
  {#- Resolve tool_call_id to function name -#}
280
- {%- set ns_tname = namespace(name=follow.get('name') | default('unknown')) -%}
281
- {%- for tc in message['tool_calls'] -%}
282
  {%- if tc.get('id') == follow.get('tool_call_id') -%}
283
  {%- set ns_tname.name = tc['function']['name'] -%}
284
  {%- endif -%}
@@ -296,9 +296,9 @@
296
  {%- endfor -%}
297
  {{- format_tool_response_block(ns_tname.name, ns_txt.s) -}}
298
  {%- for part in tool_body -%}
299
- {%- if part.get('type') == 'image' -%}
300
  {{- '<|image|>' -}}
301
- {%- elif part.get('type') == 'audio' -%}
302
  {{- '<|audio|>' -}}
303
  {%- elif part.get('type') == 'video' -%}
304
  {{- '<|video|>' -}}
@@ -314,29 +314,26 @@
314
  {%- endif -%}
315
 
316
  {%- set captured_content -%}
317
- {%- if message['content'] is string -%}
318
  {%- if role == 'model' -%}
319
  {{- strip_thinking(message['content']) -}}
320
  {%- else -%}
321
  {{- message['content'] | trim -}}
322
  {%- endif -%}
323
- {%- elif message['content'] is sequence -%}
324
  {%- for item in message['content'] -%}
325
- {%- if item['type'] == 'text' -%}
326
  {%- if role == 'model' -%}
327
  {{- strip_thinking(item['text']) -}}
328
  {%- else -%}
329
  {{- item['text'] | trim -}}
330
  {%- endif -%}
331
- {%- elif item['type'] == 'image' -%}
332
  {{- '<|image|>' -}}
333
- {%- set ns.prev_message_type = 'image' -%}
334
- {%- elif item['type'] == 'audio' -%}
335
  {{- '<|audio|>' -}}
336
- {%- set ns.prev_message_type = 'audio' -%}
337
- {%- elif item['type'] == 'video' -%}
338
  {{- '<|video|>' -}}
339
- {%- set ns.prev_message_type = 'video' -%}
340
  {%- endif -%}
341
  {%- endfor -%}
342
  {%- endif -%}
@@ -345,19 +342,46 @@
345
  {{- captured_content -}}
346
  {%- set has_content = captured_content | trim | length > 0 -%}
347
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
348
  {%- if ns.prev_message_type == 'tool_call' and not ns_tr_out.flag -%}
349
  {{- '<|tool_response>' -}}
 
 
350
  {%- elif not (ns_tr_out.flag and not has_content) -%}
351
  {{- '<turn|>\n' -}}
352
  {%- endif -%}
 
 
 
353
  {%- endif -%}
354
  {%- endfor -%}
355
 
356
  {%- if add_generation_prompt -%}
357
  {%- if ns.prev_message_type != 'tool_response' and ns.prev_message_type != 'tool_call' -%}
358
  {{- '<|turn>model\n' -}}
359
- {%- if not enable_thinking | default(false) -%}
 
 
 
 
360
  {{- '<|channel>thought\n<channel|>' -}}
361
  {%- endif -%}
362
  {%- endif -%}
363
- {%- endif -%}
 
116
  }
117
  {%- endmacro -%}
118
  {%- macro format_argument(argument, escape_keys=True) -%}
119
+ {%- if argument is none -%}
120
+ {{- 'null' -}}
121
+ {%- elif argument is string -%}
122
  {{- '<|"|>' + argument + '<|"|>' -}}
123
  {%- elif argument is boolean -%}
124
  {{- 'true' if argument else 'false' -}}
 
174
  {{- '<tool_response|>' -}}
175
  {%- endmacro -%}
176
 
177
+ {#- ===== SETUP ===== -#}
178
+ {%- set ns = namespace(prev_message_type=None, prev_non_tool_role=None) -%}
179
  {%- set loop_messages = messages -%}
180
+ {%- set enable_thinking = enable_thinking | default(false) -%}
181
+ {%- set preserve_thinking = preserve_thinking | default(true) -%}
182
  {{- bos_token -}}
183
  {#- Handle System/Tool Definitions Block -#}
184
+ {%- if enable_thinking or tools or (messages and messages[0]['role'] in ['system', 'developer']) -%}
185
  {{- '<|turn>system\n' -}}
186
  {#- Inject Thinking token at the very top of the FIRST system turn -#}
187
+ {%- if enable_thinking -%}
188
  {{- '<|think|>\n' -}}
189
  {%- set ns.prev_message_type = 'think' -%}
190
  {%- endif -%}
191
+ {%- if messages and messages[0]['role'] in ['system', 'developer'] -%}
192
  {%- if messages[0]['content'] is string -%}
193
  {{- messages[0]['content'] | trim -}}
194
  {%- elif messages[0]['content'] is sequence -%}
 
222
  {%- if message['role'] != 'tool' -%}
223
  {%- set ns.prev_message_type = None -%}
224
  {%- set role = 'model' if message['role'] == 'assistant' else message['role'] -%}
225
+ {#- Detect continuation using tracked state O(1) instead of O(n) backward scan -#}
226
+ {%- set continue_same_model_turn = (role == 'model' and ns.prev_non_tool_role == 'assistant') -%}
 
 
 
 
 
 
 
 
 
 
 
227
  {%- if not continue_same_model_turn -%}
228
  {{- '<|turn>' + role + '\n' }}
229
  {%- endif -%}
230
 
231
+ {#- Render reasoning/reasoning_content as thinking channel (tool-call turns only) -#}
232
  {%- set thinking_text = message.get('reasoning') or message.get('reasoning_content') -%}
233
+ {%- set thinking_gate = (loop.index0 > ns_turn.last_user_idx) or preserve_thinking -%}
234
+ {%- if thinking_text and thinking_gate and message.get('tool_calls') -%}
235
  {{- '<|channel>thought\n' + thinking_text + '\n<channel|>' -}}
236
  {%- endif -%}
237
 
238
+ {%- if message.get('tool_calls') -%}
239
+ {%- for tool_call in message.get('tool_calls') -%}
240
  {%- set function = tool_call['function'] -%}
241
  {{- '<|tool_call>call:' + function['name'] + '{' -}}
242
  {%- if function['arguments'] is mapping -%}
 
246
  {%- set ns_args.found_first = true -%}
247
  {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
248
  {%- endfor -%}
249
+ {%- elif function['arguments'] is none -%}
250
+ {%- else -%}
251
+ {{- raise_exception(
252
+ "chat_template: tool_calls[].function.arguments must be a "
253
+ "JSON object (mapping), not a string. Deserialize arguments "
254
+ "before passing to the template."
255
+ ) -}}
256
  {%- endif -%}
257
  {{- '}<tool_call|>' -}}
258
  {%- endfor -%}
 
262
  {%- set ns_tr_out = namespace(flag=false) -%}
263
  {%- if message.get('tool_responses') -%}
264
  {#- Legacy: tool_responses embedded on the assistant message (Google/Gemma native) -#}
265
+ {%- for tool_response in message.get('tool_responses') -%}
266
+ {{- format_tool_response_block(tool_response['name'] | default('unknown', true), tool_response['response']) -}}
267
  {%- set ns_tr_out.flag = true -%}
268
  {%- set ns.prev_message_type = 'tool_response' -%}
269
  {%- endfor -%}
 
277
  {%- else -%}
278
  {%- set follow = loop_messages[k] -%}
279
  {#- Resolve tool_call_id to function name -#}
280
+ {%- set ns_tname = namespace(name=follow.get('name') or 'unknown') -%}
281
+ {%- for tc in message.get('tool_calls') -%}
282
  {%- if tc.get('id') == follow.get('tool_call_id') -%}
283
  {%- set ns_tname.name = tc['function']['name'] -%}
284
  {%- endif -%}
 
296
  {%- endfor -%}
297
  {{- format_tool_response_block(ns_tname.name, ns_txt.s) -}}
298
  {%- for part in tool_body -%}
299
+ {%- if part.get('type') in ['image', 'image_url'] -%}
300
  {{- '<|image|>' -}}
301
+ {%- elif part.get('type') in ['audio', 'input_audio'] -%}
302
  {{- '<|audio|>' -}}
303
  {%- elif part.get('type') == 'video' -%}
304
  {{- '<|video|>' -}}
 
314
  {%- endif -%}
315
 
316
  {%- set captured_content -%}
317
+ {%- if message.get('content') is string -%}
318
  {%- if role == 'model' -%}
319
  {{- strip_thinking(message['content']) -}}
320
  {%- else -%}
321
  {{- message['content'] | trim -}}
322
  {%- endif -%}
323
+ {%- elif message.get('content') is sequence -%}
324
  {%- for item in message['content'] -%}
325
+ {%- if item.get('type') == 'text' -%}
326
  {%- if role == 'model' -%}
327
  {{- strip_thinking(item['text']) -}}
328
  {%- else -%}
329
  {{- item['text'] | trim -}}
330
  {%- endif -%}
331
+ {%- elif item.get('type') in ['image', 'image_url'] -%}
332
  {{- '<|image|>' -}}
333
+ {%- elif item.get('type') in ['audio', 'input_audio'] -%}
 
334
  {{- '<|audio|>' -}}
335
+ {%- elif item.get('type') == 'video' -%}
 
336
  {{- '<|video|>' -}}
 
337
  {%- endif -%}
338
  {%- endfor -%}
339
  {%- endif -%}
 
342
  {{- captured_content -}}
343
  {%- set has_content = captured_content | trim | length > 0 -%}
344
 
345
+ {#- Forward-scan: find next non-tool message role for continuation detection -#}
346
+ {%- set next_nt = namespace(role=None, found=false) -%}
347
+ {%- for j in range(loop.index0 + 1, loop_messages | length) -%}
348
+ {%- if not next_nt.found -%}
349
+ {%- if loop_messages[j]['role'] != 'tool' -%}
350
+ {%- set next_nt.role = loop_messages[j]['role'] -%}
351
+ {%- set next_nt.found = true -%}
352
+ {%- endif -%}
353
+ {%- endif -%}
354
+ {%- endfor -%}
355
+
356
+ {%- set continues_into_next = (
357
+ role == 'model'
358
+ and next_nt.role == 'assistant'
359
+ and not message.get('tool_calls')
360
+ and not ns_tr_out.flag
361
+ ) -%}
362
+
363
  {%- if ns.prev_message_type == 'tool_call' and not ns_tr_out.flag -%}
364
  {{- '<|tool_response>' -}}
365
+ {%- elif continues_into_next -%}
366
+ {{- '\n' -}}
367
  {%- elif not (ns_tr_out.flag and not has_content) -%}
368
  {{- '<turn|>\n' -}}
369
  {%- endif -%}
370
+
371
+ {#- Track previous non-tool role for next iteration (avoids O(n) backward scan) -#}
372
+ {%- set ns.prev_non_tool_role = message['role'] -%}
373
  {%- endif -%}
374
  {%- endfor -%}
375
 
376
  {%- if add_generation_prompt -%}
377
  {%- if ns.prev_message_type != 'tool_response' and ns.prev_message_type != 'tool_call' -%}
378
  {{- '<|turn>model\n' -}}
379
+ {%- endif -%}
380
+
381
+ {%- if not enable_thinking -%}
382
+ {#- Suppress thinking — but not when awaiting tool responses -#}
383
+ {%- if ns.prev_message_type != 'tool_call' -%}
384
  {{- '<|channel>thought\n<channel|>' -}}
385
  {%- endif -%}
386
  {%- endif -%}
387
+ {%- endif -%}