fix-apply-chat-template

#59
by bigmoyan - opened
Files changed (2) hide show
  1. chat_template.jinja +21 -13
  2. tokenization_kimi.py +9 -2
chat_template.jinja CHANGED
@@ -1,3 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  {%- if tools -%}
2
  <|im_system|>tool_declare<|im_middle|>{{ tools | tojson(separators=(',', ':')) }}<|im_end|>
3
  {%- endif -%}
@@ -16,26 +32,18 @@
16
  {%- endif -%}
17
 
18
  {%- if message['role'] == 'assistant' and message.get('tool_calls') -%}
19
- {%- if message['content'] -%}{{ message['content'] }}{%- endif -%}
20
- <|tool_calls_section_begin|>
21
  {%- for tool_call in message['tool_calls'] -%}
22
  {%- set formatted_id = tool_call['id'] -%}
23
  <|tool_call_begin|>{{ formatted_id }}<|tool_call_argument_begin|>{% if tool_call['function']['arguments'] is string %}{{ tool_call['function']['arguments'] }}{% else %}{{ tool_call['function']['arguments'] | tojson }}{% endif %}<|tool_call_end|>
24
  {%- endfor -%}
25
  <|tool_calls_section_end|>
26
  {%- elif message['role'] == 'tool' -%}
27
- ## Return of {{ message.tool_call_id }}
28
- {{ message['content'] }}
29
- {%- elif message['content'] is string -%}
30
- {{ message['content'] }}
31
  {%- elif message['content'] is not none -%}
32
- {% for content in message['content'] -%}
33
- {% if content['type'] == 'image' or 'image' in content or 'image_url' in content -%}
34
- <|media_start|>image<|media_content|><|media_pad|><|media_end|>
35
- {% else -%}
36
- {{ content['text'] }}
37
- {%- endif -%}
38
- {%- endfor -%}
39
  {%- endif -%}
40
  <|im_end|>
41
  {%- endfor -%}
 
1
+ {% macro render_content(msg) -%}
2
+ {%- set c = msg.get('content') -%}
3
+ {%- if c is string -%}
4
+ {{ c }}
5
+ {%- elif c is not none -%}
6
+ {% for content in c -%}
7
+ {% if content['type'] == 'image' or 'image' in content or 'image_url' in content -%}
8
+ <|media_start|>image<|media_content|><|media_pad|><|media_end|>
9
+ {% else -%}
10
+ {{ content['text'] }}
11
+ {%- endif -%}
12
+ {%- endfor -%}
13
+ {%- endif -%}
14
+ {%- endmacro %}
15
+
16
+
17
  {%- if tools -%}
18
  <|im_system|>tool_declare<|im_middle|>{{ tools | tojson(separators=(',', ':')) }}<|im_end|>
19
  {%- endif -%}
 
32
  {%- endif -%}
33
 
34
  {%- if message['role'] == 'assistant' and message.get('tool_calls') -%}
35
+ {{render_content(message)}}<|tool_calls_section_begin|>
 
36
  {%- for tool_call in message['tool_calls'] -%}
37
  {%- set formatted_id = tool_call['id'] -%}
38
  <|tool_call_begin|>{{ formatted_id }}<|tool_call_argument_begin|>{% if tool_call['function']['arguments'] is string %}{{ tool_call['function']['arguments'] }}{% else %}{{ tool_call['function']['arguments'] | tojson }}{% endif %}<|tool_call_end|>
39
  {%- endfor -%}
40
  <|tool_calls_section_end|>
41
  {%- elif message['role'] == 'tool' -%}
42
+ {%- set tool_call_id = message.tool_call_id if message.tool_call_id.startswith('functions.') else 'functions.' + message.tool_call_id -%}
43
+ ## Return of {{ tool_call_id }}
44
+ {{render_content(message)}}
 
45
  {%- elif message['content'] is not none -%}
46
+ {{render_content(message)}}
 
 
 
 
 
 
47
  {%- endif -%}
48
  <|im_end|>
49
  {%- endfor -%}
tokenization_kimi.py CHANGED
@@ -326,10 +326,17 @@ class TikTokenTokenizer(PreTrainedTokenizer):
326
 
327
 
328
  def apply_chat_template(
329
- self, conversation, tools: Optional[list[dict]] = None, **kwargs
 
 
 
330
  ):
331
  tools = deep_sort_dict(tools)
332
- return super().apply_chat_template(conversation, tools=tools, **kwargs)
 
 
 
 
333
 
334
 
335
  def deep_sort_dict(obj: Any) -> Any:
 
326
 
327
 
328
  def apply_chat_template(
329
+ self, conversation, tools: Optional[list[dict]] = None,
330
+ tokenize: bool = False,
331
+ add_generation_prompt: bool = True,
332
+ **kwargs
333
  ):
334
  tools = deep_sort_dict(tools)
335
+ return super().apply_chat_template(conversation,
336
+ tools=tools,
337
+ tokenize=tokenize,
338
+ add_generation_prompt=add_generation_prompt,
339
+ **kwargs)
340
 
341
 
342
  def deep_sort_dict(obj: Any) -> Any: