Embed chat_template in tokenizer_config.json
#8
by piero-atelico - opened
- tokenizer_config.json +3 -2
tokenizer_config.json
CHANGED
|
@@ -70,5 +70,6 @@
|
|
| 70 |
"str_token": "<|tool_response>",
|
| 71 |
"think_token": "<|think|>",
|
| 72 |
"tokenizer_class": "GemmaTokenizer",
|
| 73 |
-
"unk_token": "<unk>"
|
| 74 |
-
}
|
|
|
|
|
|
| 70 |
"str_token": "<|tool_response>",
|
| 71 |
"think_token": "<|think|>",
|
| 72 |
"tokenizer_class": "GemmaTokenizer",
|
| 73 |
+
"unk_token": "<unk>",
|
| 74 |
+
"chat_template": "{%- macro format_parameters(properties, required) -%}\n {%- set standard_keys = ['description', 'type', 'properties', 'required', 'nullable'] -%}\n {%- set ns = namespace(found_first=false) -%}\n {%- for key, value in properties | dictsort -%}\n {%- set add_comma = false -%}\n {%- if key not in standard_keys -%}\n {%- if ns.found_first %},{% endif -%}\n {%- set ns.found_first = true -%}\n {{ key }}:{\n {%- if value['description'] -%}\n description:<|\"|>{{ value['description'] }}<|\"|>\n {%- set add_comma = true -%}\n {%- endif -%}\n {%- if value['nullable'] %}\n {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}\n nullable:true\n {%- endif -%}\n {%- if value['type'] | upper == 'STRING' -%}\n {%- if value['enum'] -%}\n {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}\n enum:{{ format_argument(value['enum']) }}\n {%- endif -%}\n {%- elif value['type'] | upper == 'OBJECT' -%}\n ,properties:{\n {%- if value['properties'] is defined and value['properties'] is mapping -%}\n {{- format_parameters(value['properties'], value['required'] | default([])) -}}\n {%- elif value is mapping -%}\n {{- format_parameters(value, value['required'] | default([])) -}}\n {%- endif -%}\n }\n {%- if value['required'] -%}\n ,required:[\n {%- for item in value['required'] | default([]) -%}\n <|\"|>{{- item -}}<|\"|>\n {%- if not loop.last %},{% endif -%}\n {%- endfor -%}\n ]\n {%- endif -%}\n {%- elif value['type'] | upper == 'ARRAY' -%}\n {%- if value['items'] is mapping and value['items'] -%}\n ,items:{\n {%- set ns_items = namespace(found_first=false) -%}\n {%- for item_key, item_value in value['items'] | dictsort -%}\n {%- if item_value is not none -%}\n {%- if ns_items.found_first %},{% endif -%}\n {%- set ns_items.found_first = true -%}\n {%- if item_key == 'properties' -%}\n properties:{\n {%- if item_value is mapping -%}\n {{- format_parameters(item_value, value['items']['required'] | default([])) -}}\n {%- endif -%}\n }\n {%- elif item_key == 'required' -%}\n required:[\n {%- for req_item in item_value -%}\n <|\"|>{{- req_item -}}<|\"|>\n {%- if not loop.last %},{% endif -%}\n {%- endfor -%}\n ]\n {%- elif item_key == 'type' -%}\n {%- if item_value is string -%}\n type:{{ format_argument(item_value | upper) }}\n {%- else -%}\n type:{{ format_argument(item_value | map('upper') | list) }}\n {%- endif -%}\n {%- else -%}\n {{ item_key }}:{{ format_argument(item_value) }}\n {%- endif -%}\n {%- endif -%}\n {%- endfor -%}\n }\n {%- endif -%}\n {%- endif -%}\n {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}\n type:<|\"|>{{ value['type'] | upper }}<|\"|>}\n {%- endif -%}\n {%- endfor -%}\n{%- endmacro -%}\n{%- macro format_function_declaration(tool_data) -%}\n declaration:{{- tool_data['function']['name'] -}}{description:<|\"|>{{- tool_data['function']['description'] -}}<|\"|>\n {%- set params = tool_data['function']['parameters'] -%}\n {%- if params -%}\n ,parameters:{\n {%- if params['properties'] -%}\n properties:{ {{- format_parameters(params['properties'], params['required']) -}} },\n {%- endif -%}\n {%- if params['required'] -%}\n required:[\n {%- for item in params['required'] -%}\n <|\"|>{{- item -}}<|\"|>\n {{- ',' if not loop.last -}}\n {%- endfor -%}\n ],\n {%- endif -%}\n {%- if params['type'] -%}\n type:<|\"|>{{- params['type'] | upper -}}<|\"|>}\n {%- endif -%}\n {%- endif -%}\n {%- if 'response' in tool_data['function'] -%}\n {%- set response_declaration = tool_data['function']['response'] -%}\n ,response:{\n {%- if response_declaration['description'] -%}\n description:<|\"|>{{- response_declaration['description'] -}}<|\"|>,\n {%- endif -%}\n {%- if response_declaration['type'] | upper == 'OBJECT' -%}\n type:<|\"|>{{- response_declaration['type'] | upper -}}<|\"|>}\n {%- endif -%}\n {%- endif -%}\n }\n{%- endmacro -%}\n{%- macro format_argument(argument, escape_keys=True) -%}\n {%- if argument is string -%}\n {{- '<|\"|>' + argument + '<|\"|>' -}}\n {%- elif argument is boolean -%}\n {{- 'true' if argument else 'false' -}}\n {%- elif argument is mapping -%}\n {{- '{' -}}\n {%- set ns = namespace(found_first=false) -%}\n {%- for key, value in argument | dictsort -%}\n {%- if ns.found_first %},{% endif -%}\n {%- set ns.found_first = true -%}\n {%- if escape_keys -%}\n {{- '<|\"|>' + key + '<|\"|>' -}}\n {%- else -%}\n {{- key -}}\n {%- endif -%}\n :{{- format_argument(value, escape_keys=escape_keys) -}}\n {%- endfor -%}\n {{- '}' -}}\n {%- elif argument is sequence -%}\n {{- '[' -}}\n {%- for item in argument -%}\n {{- format_argument(item, escape_keys=escape_keys) -}}\n {%- if not loop.last %},{% endif -%}\n {%- endfor -%}\n {{- ']' -}}\n {%- else -%}\n {{- argument -}}\n {%- endif -%}\n{%- endmacro -%}\n{%- macro strip_thinking(text) -%}\n {%- set ns = namespace(result='') -%}\n {%- for part in text.split('<channel|>') -%}\n {%- if '<|channel>' in part -%}\n {%- set ns.result = ns.result + part.split('<|channel>')[0] -%}\n {%- else -%}\n {%- set ns.result = ns.result + part -%}\n {%- endif -%}\n {%- endfor -%}\n {{- ns.result | trim -}}\n{%- endmacro -%}\n\n{%- set ns = namespace(prev_message_type=None) -%}\n{%- set loop_messages = messages -%}\n{{ bos_token }}\n{#- Handle System/Tool Definitions Block -#}\n{%- if (enable_thinking is defined and enable_thinking) or tools or messages[0]['role'] in ['system', 'developer'] -%}\n {{- '<|turn>system\\n' -}}\n\n {#- Inject Thinking token at the very top of the FIRST system turn -#}\n {%- if enable_thinking is defined and enable_thinking -%}\n {{- '<|think|>' -}}\n {%- set ns.prev_message_type = 'think' -%}\n {%- endif -%}\n\n {%- if messages[0]['role'] in ['system', 'developer'] -%}\n {{- messages[0]['content'] | trim -}}\n {%- set loop_messages = messages[1:] -%}\n {%- endif -%}\n\n {%- if tools -%}\n {%- for tool in tools %}\n {{- '<|tool>' -}}\n {{- format_function_declaration(tool) | trim -}}\n {{- '<tool|>' -}}\n {%- endfor %}\n {%- set ns.prev_message_type = 'tool' -%}\n {%- endif -%}\n\n {{- '<turn|>\\n' -}}\n{%- endif %}\n\n{#- Loop through messages -#}\n{%- for message in loop_messages -%}\n {%- set ns.prev_message_type = None -%}\n {%- set role = 'model' if message['role'] == 'assistant' else message['role'] -%}\n {{- '<|turn>' + role + '\\n' }}\n\n {%- if message['tool_calls'] -%}\n {%- for tool_call in message['tool_calls'] -%}\n {%- set function = tool_call['function'] -%}\n {{- '<|tool_call>call:' + function['name'] + '{' -}}\n {%- if function['arguments'] is mapping -%}\n {%- set ns_args = namespace(found_first=false) -%}\n {%- for key, value in function['arguments'] | dictsort -%}\n {%- if ns_args.found_first %},{% endif -%}\n {%- set ns_args.found_first = true -%}\n {{- key -}}:{{- format_argument(value, escape_keys=False) -}}\n {%- endfor -%}\n {%- elif function['arguments'] is string -%}\n {{- function['arguments'] -}}\n {%- endif -%}\n {{- '}<tool_call|>' -}}\n {%- endfor -%}\n {%- set ns.prev_message_type = 'tool_call' -%}\n {%- endif -%}\n\n {%- if message['tool_responses'] -%}\n {#- Tool Response handling -#}\n {%- for tool_response in message['tool_responses'] -%}\n {{- '<|tool_response>' -}}\n {%- if tool_response['response'] is mapping -%}\n {{- 'response:' + tool_response['name'] | default('unknown') + '{' -}}\n {%- for key, value in tool_response['response'] | dictsort -%}\n {{- key -}}:{{- format_argument(value, escape_keys=False) -}}\n {%- if not loop.last %},{% endif -%}\n {%- endfor -%}\n {{- '}' -}}\n {%- else -%}\n {{- 'response:' + tool_response['name'] | default('unknown') + '{value:' + format_argument(tool_response['response'], escape_keys=False) + '}' -}}\n {%- endif -%}\n {{- '<tool_response|>' -}}\n {%- endfor -%}\n {%- set ns.prev_message_type = 'tool_response' -%}\n {%- endif -%}\n\n {%- if message['content'] is string -%}\n {%- if role == 'model' -%}\n {{- strip_thinking(message['content']) -}}\n {%- else -%}\n {{- message['content'] | trim -}}\n {%- endif -%}\n {%- elif message['content'] is sequence -%}\n {%- for item in message['content'] -%}\n {%- if item['type'] == 'text' -%}\n {%- if role == 'model' -%}\n {{- strip_thinking(item['text']) -}}\n {%- else -%}\n {{- item['text'] | trim -}}\n {%- endif -%}\n {%- elif item['type'] == 'image' -%}\n {{- '\\n\\n<|image|>\\n\\n' -}}\n {%- set ns.prev_message_type = 'image' -%}\n {%- elif item['type'] == 'audio' -%}\n {{- '<|audio|>' -}}\n {%- set ns.prev_message_type = 'audio' -%}\n {%- elif item['type'] == 'video' -%}\n {{- '\\n\\n<|video|>\\n\\n' -}}\n {%- set ns.prev_message_type = 'video' -%}\n {%- endif -%}\n {%- endfor -%}\n {%- endif -%}\n\n {%- if not (message['tool_responses'] and not message['content']) -%}\n {{- '<turn|>\\n' -}}\n {%- endif -%}\n{%- endfor -%}\n\n{%- if add_generation_prompt -%}\n {%- if ns.prev_message_type != 'tool_response' -%}\n {{- '<|turn>model\\n' -}}\n {%- endif -%}\n{%- endif -%}"
|
| 75 |
+
}
|