fix-apply-chat-template
#59
by
bigmoyan
- opened
- chat_template.jinja +21 -13
- tokenization_kimi.py +9 -2
chat_template.jinja
CHANGED
|
@@ -1,3 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
{%- if tools -%}
|
| 2 |
<|im_system|>tool_declare<|im_middle|>{{ tools | tojson(separators=(',', ':')) }}<|im_end|>
|
| 3 |
{%- endif -%}
|
|
@@ -16,26 +32,18 @@
|
|
| 16 |
{%- endif -%}
|
| 17 |
|
| 18 |
{%- if message['role'] == 'assistant' and message.get('tool_calls') -%}
|
| 19 |
-
{
|
| 20 |
-
<|tool_calls_section_begin|>
|
| 21 |
{%- for tool_call in message['tool_calls'] -%}
|
| 22 |
{%- set formatted_id = tool_call['id'] -%}
|
| 23 |
<|tool_call_begin|>{{ formatted_id }}<|tool_call_argument_begin|>{% if tool_call['function']['arguments'] is string %}{{ tool_call['function']['arguments'] }}{% else %}{{ tool_call['function']['arguments'] | tojson }}{% endif %}<|tool_call_end|>
|
| 24 |
{%- endfor -%}
|
| 25 |
<|tool_calls_section_end|>
|
| 26 |
{%- elif message['role'] == 'tool' -%}
|
| 27 |
-
|
| 28 |
-
{{
|
| 29 |
-
|
| 30 |
-
{{ message['content'] }}
|
| 31 |
{%- elif message['content'] is not none -%}
|
| 32 |
-
{
|
| 33 |
-
{% if content['type'] == 'image' or 'image' in content or 'image_url' in content -%}
|
| 34 |
-
<|media_start|>image<|media_content|><|media_pad|><|media_end|>
|
| 35 |
-
{% else -%}
|
| 36 |
-
{{ content['text'] }}
|
| 37 |
-
{%- endif -%}
|
| 38 |
-
{%- endfor -%}
|
| 39 |
{%- endif -%}
|
| 40 |
<|im_end|>
|
| 41 |
{%- endfor -%}
|
|
|
|
| 1 |
+
{% macro render_content(msg) -%}
|
| 2 |
+
{%- set c = msg.get('content') -%}
|
| 3 |
+
{%- if c is string -%}
|
| 4 |
+
{{ c }}
|
| 5 |
+
{%- elif c is not none -%}
|
| 6 |
+
{% for content in c -%}
|
| 7 |
+
{% if content['type'] == 'image' or 'image' in content or 'image_url' in content -%}
|
| 8 |
+
<|media_start|>image<|media_content|><|media_pad|><|media_end|>
|
| 9 |
+
{% else -%}
|
| 10 |
+
{{ content['text'] }}
|
| 11 |
+
{%- endif -%}
|
| 12 |
+
{%- endfor -%}
|
| 13 |
+
{%- endif -%}
|
| 14 |
+
{%- endmacro %}
|
| 15 |
+
|
| 16 |
+
|
| 17 |
{%- if tools -%}
|
| 18 |
<|im_system|>tool_declare<|im_middle|>{{ tools | tojson(separators=(',', ':')) }}<|im_end|>
|
| 19 |
{%- endif -%}
|
|
|
|
| 32 |
{%- endif -%}
|
| 33 |
|
| 34 |
{%- if message['role'] == 'assistant' and message.get('tool_calls') -%}
|
| 35 |
+
{{render_content(message)}}<|tool_calls_section_begin|>
|
|
|
|
| 36 |
{%- for tool_call in message['tool_calls'] -%}
|
| 37 |
{%- set formatted_id = tool_call['id'] -%}
|
| 38 |
<|tool_call_begin|>{{ formatted_id }}<|tool_call_argument_begin|>{% if tool_call['function']['arguments'] is string %}{{ tool_call['function']['arguments'] }}{% else %}{{ tool_call['function']['arguments'] | tojson }}{% endif %}<|tool_call_end|>
|
| 39 |
{%- endfor -%}
|
| 40 |
<|tool_calls_section_end|>
|
| 41 |
{%- elif message['role'] == 'tool' -%}
|
| 42 |
+
{%- set tool_call_id = message.tool_call_id if message.tool_call_id.startswith('functions.') else 'functions.' + message.tool_call_id -%}
|
| 43 |
+
## Return of {{ tool_call_id }}
|
| 44 |
+
{{render_content(message)}}
|
|
|
|
| 45 |
{%- elif message['content'] is not none -%}
|
| 46 |
+
{{render_content(message)}}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
{%- endif -%}
|
| 48 |
<|im_end|>
|
| 49 |
{%- endfor -%}
|
tokenization_kimi.py
CHANGED
|
@@ -326,10 +326,17 @@ class TikTokenTokenizer(PreTrainedTokenizer):
|
|
| 326 |
|
| 327 |
|
| 328 |
def apply_chat_template(
|
| 329 |
-
self, conversation, tools: Optional[list[dict]] = None,
|
|
|
|
|
|
|
|
|
|
| 330 |
):
|
| 331 |
tools = deep_sort_dict(tools)
|
| 332 |
-
return super().apply_chat_template(conversation,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 333 |
|
| 334 |
|
| 335 |
def deep_sort_dict(obj: Any) -> Any:
|
|
|
|
| 326 |
|
| 327 |
|
| 328 |
def apply_chat_template(
|
| 329 |
+
self, conversation, tools: Optional[list[dict]] = None,
|
| 330 |
+
tokenize: bool = False,
|
| 331 |
+
add_generation_prompt: bool = True,
|
| 332 |
+
**kwargs
|
| 333 |
):
|
| 334 |
tools = deep_sort_dict(tools)
|
| 335 |
+
return super().apply_chat_template(conversation,
|
| 336 |
+
tools=tools,
|
| 337 |
+
tokenize=tokenize,
|
| 338 |
+
add_generation_prompt=add_generation_prompt,
|
| 339 |
+
**kwargs)
|
| 340 |
|
| 341 |
|
| 342 |
def deep_sort_dict(obj: Any) -> Any:
|