Upload 5 files
Browse files- .gitattributes +1 -0
- chat_template.jinja +74 -13
- tekken.json +3 -0
- tokenizer_config.json +1 -0
.gitattributes
CHANGED
|
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
tekken.json filter=lfs diff=lfs merge=lfs -text
|
chat_template.jinja
CHANGED
|
@@ -1,30 +1,66 @@
|
|
| 1 |
-
{%- set
|
| 2 |
-
|
| 3 |
-
|
| 4 |
{{- bos_token }}
|
| 5 |
-
|
|
|
|
| 6 |
{%- if messages[0]['role'] == 'system' %}
|
| 7 |
{%- if messages[0]['content'] is string %}
|
| 8 |
-
{%- set
|
| 9 |
{%- else %}
|
| 10 |
-
{%- set
|
| 11 |
{%- endif %}
|
| 12 |
{%- set loop_messages = messages[1:] %}
|
| 13 |
{%- else %}
|
| 14 |
-
{%- set
|
| 15 |
{%- set loop_messages = messages %}
|
| 16 |
{%- endif %}
|
| 17 |
-
|
| 18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
{%- for message in loop_messages %}
|
| 20 |
{%- if message['role'] == 'user' %}
|
|
|
|
| 21 |
{%- if message['content'] is string %}
|
| 22 |
{{- '[INST]' + message['content'] + '[/INST]' }}
|
| 23 |
{%- else %}
|
| 24 |
{{- '[INST]' }}
|
| 25 |
{%- for block in message['content'] %}
|
| 26 |
{%- if block['type'] == 'text' %}
|
| 27 |
-
{
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
{%- elif block['type'] in ['image', 'image_url'] %}
|
| 29 |
{{- '[IMG]' }}
|
| 30 |
{%- else %}
|
|
@@ -33,19 +69,44 @@
|
|
| 33 |
{%- endfor %}
|
| 34 |
{{- '[/INST]' }}
|
| 35 |
{%- endif %}
|
|
|
|
| 36 |
{%- elif message['role'] == 'system' %}
|
| 37 |
{%- if message['content'] is string %}
|
| 38 |
{{- '[SYSTEM_PROMPT]' + message['content'] + '[/SYSTEM_PROMPT]' }}
|
| 39 |
{%- else %}
|
| 40 |
{{- '[SYSTEM_PROMPT]' + message['content'][0]['text'] + '[/SYSTEM_PROMPT]' }}
|
| 41 |
{%- endif %}
|
|
|
|
| 42 |
{%- elif message['role'] == 'assistant' %}
|
| 43 |
{%- if message['content'] is string %}
|
| 44 |
-
{{- message['content']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
{%- else %}
|
| 46 |
-
{
|
| 47 |
{%- endif %}
|
|
|
|
|
|
|
| 48 |
{%- else %}
|
| 49 |
-
{{- raise_exception('Only user, system and
|
| 50 |
{%- endif %}
|
| 51 |
{%- endfor %}
|
|
|
|
|
|
|
|
|
| 1 |
+
{%- set default_system_message = 'First draft your thinking process (inner monologue) until you arrive at a response. Format your response using Markdown, and use LaTeX for any mathematical equations. Write both your thoughts and the response in the same language as the input.\n\nYour thinking process must follow the template below:[THINK]Your thoughts or/and draft, like working through an exercise on scratch paper. Be as casual and as long as you want until you are confident to generate the response. Use the same language as the input.[/THINK]Here, provide a self-contained response.' %}
|
| 2 |
+
|
|
|
|
| 3 |
{{- bos_token }}
|
| 4 |
+
|
| 5 |
+
{#- Extract system message if present -#}
|
| 6 |
{%- if messages[0]['role'] == 'system' %}
|
| 7 |
{%- if messages[0]['content'] is string %}
|
| 8 |
+
{%- set raw_system_message = messages[0]['content'] %}
|
| 9 |
{%- else %}
|
| 10 |
+
{%- set raw_system_message = messages[0]['content'][0]['text'] %}
|
| 11 |
{%- endif %}
|
| 12 |
{%- set loop_messages = messages[1:] %}
|
| 13 |
{%- else %}
|
| 14 |
+
{%- set raw_system_message = "" %}
|
| 15 |
{%- set loop_messages = messages %}
|
| 16 |
{%- endif %}
|
| 17 |
+
|
| 18 |
+
{#- Detect THINK flag by searching for exact phrase "/think" -#}
|
| 19 |
+
{%- if "/think" in raw_system_message %}
|
| 20 |
+
{%- set THINK = True %}
|
| 21 |
+
{%- else %}
|
| 22 |
+
{%- set THINK = False %}
|
| 23 |
+
{%- endif %}
|
| 24 |
+
|
| 25 |
+
{#- Apply logic depending on THINK flag -#}
|
| 26 |
+
{%- if THINK %}
|
| 27 |
+
{%- if raw_system_message|length > 0 %}
|
| 28 |
+
{%- set system_message = default_system_message + "\n\n" + raw_system_message %}
|
| 29 |
+
{%- else %}
|
| 30 |
+
{%- set system_message = default_system_message %}
|
| 31 |
+
{%- endif %}
|
| 32 |
+
{{- '[SYSTEM_PROMPT]' + system_message + '[/SYSTEM_PROMPT]' }}
|
| 33 |
+
{%- else %}
|
| 34 |
+
{%- if raw_system_message|length > 0 %}
|
| 35 |
+
{{- '[SYSTEM_PROMPT]' + raw_system_message + '[/SYSTEM_PROMPT]' }}
|
| 36 |
+
{%- endif %}
|
| 37 |
+
{%- endif %}
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
{#- Tool description appended ONLY to last user message. Edits made by Unsloth #}
|
| 41 |
+
{%- set tools_description = "" %}
|
| 42 |
+
{%- set has_tools = false %}
|
| 43 |
+
|
| 44 |
+
{%- if tools is defined and tools is not none and tools|length > 0 %}
|
| 45 |
+
{%- set has_tools = true %}
|
| 46 |
+
{%- set tools_description = "[AVAILABLE_TOOLS]" + (tools | tojson) + "[/AVAILABLE_TOOLS]" %}
|
| 47 |
+
{{- tools_description }}
|
| 48 |
+
{%- endif %}
|
| 49 |
+
|
| 50 |
{%- for message in loop_messages %}
|
| 51 |
{%- if message['role'] == 'user' %}
|
| 52 |
+
|
| 53 |
{%- if message['content'] is string %}
|
| 54 |
{{- '[INST]' + message['content'] + '[/INST]' }}
|
| 55 |
{%- else %}
|
| 56 |
{{- '[INST]' }}
|
| 57 |
{%- for block in message['content'] %}
|
| 58 |
{%- if block['type'] == 'text' %}
|
| 59 |
+
{%- if block['text'] is defined %}
|
| 60 |
+
{{- block['text'] }}
|
| 61 |
+
{%- else %}
|
| 62 |
+
{{- block['content'] }}
|
| 63 |
+
{%- endif %}
|
| 64 |
{%- elif block['type'] in ['image', 'image_url'] %}
|
| 65 |
{{- '[IMG]' }}
|
| 66 |
{%- else %}
|
|
|
|
| 69 |
{%- endfor %}
|
| 70 |
{{- '[/INST]' }}
|
| 71 |
{%- endif %}
|
| 72 |
+
|
| 73 |
{%- elif message['role'] == 'system' %}
|
| 74 |
{%- if message['content'] is string %}
|
| 75 |
{{- '[SYSTEM_PROMPT]' + message['content'] + '[/SYSTEM_PROMPT]' }}
|
| 76 |
{%- else %}
|
| 77 |
{{- '[SYSTEM_PROMPT]' + message['content'][0]['text'] + '[/SYSTEM_PROMPT]' }}
|
| 78 |
{%- endif %}
|
| 79 |
+
|
| 80 |
{%- elif message['role'] == 'assistant' %}
|
| 81 |
{%- if message['content'] is string %}
|
| 82 |
+
{{- message['content'] }}
|
| 83 |
+
{%- elif message['content'] is iterable %}
|
| 84 |
+
{{- message['content'][0]['text'] }}
|
| 85 |
+
{%- endif %}
|
| 86 |
+
|
| 87 |
+
{%- if message['tool_calls'] is defined and message['tool_calls'] is not none %}
|
| 88 |
+
{%- for tool in message['tool_calls'] %}
|
| 89 |
+
{%- set arguments = tool['function']['arguments'] %}
|
| 90 |
+
{%- if arguments is not string %}
|
| 91 |
+
{%- set arguments = arguments|tojson %}
|
| 92 |
+
{%- endif %}
|
| 93 |
+
{{- "[TOOL_CALLS]" + tool['function']['name'] + "[ARGS]" + arguments }}
|
| 94 |
+
{%- endfor %}
|
| 95 |
+
{%- endif %}
|
| 96 |
+
|
| 97 |
+
{{- eos_token }}
|
| 98 |
+
|
| 99 |
+
{%- elif message["role"] == "tool_results" or message["role"] == "tool" %}
|
| 100 |
+
{%- if message.content is defined and message.content.content is defined %}
|
| 101 |
+
{%- set content = message.content.content %}
|
| 102 |
{%- else %}
|
| 103 |
+
{%- set content = message.content %}
|
| 104 |
{%- endif %}
|
| 105 |
+
{{- "[TOOL_RESULTS]" + content|string + "[/TOOL_RESULTS]" }}
|
| 106 |
+
|
| 107 |
{%- else %}
|
| 108 |
+
{{- raise_exception('Only user, system, assistant and tool roles are supported!') }}
|
| 109 |
{%- endif %}
|
| 110 |
{%- endfor %}
|
| 111 |
+
|
| 112 |
+
{#- Licensed under the Apache License, Version 2.0 (the "License") #}
|
tekken.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:05e051a32ae9d6333459da0083d36507116334fc85378321cfb32dfff7187531
|
| 3 |
+
size 19399765
|
tokenizer_config.json
CHANGED
|
@@ -9014,6 +9014,7 @@
|
|
| 9014 |
"model_max_length": 131072,
|
| 9015 |
"pad_token": "<pad>",
|
| 9016 |
"padding_side": "left",
|
|
|
|
| 9017 |
"processor_class": "PixtralProcessor",
|
| 9018 |
"tokenizer_class": "LlamaTokenizerFast",
|
| 9019 |
"unk_token": "<unk>",
|
|
|
|
| 9014 |
"model_max_length": 131072,
|
| 9015 |
"pad_token": "<pad>",
|
| 9016 |
"padding_side": "left",
|
| 9017 |
+
"chat_template": "{%- set default_system_message = 'First draft your thinking process (inner monologue) until you arrive at a response. Format your response using Markdown, and use LaTeX for any mathematical equations. Write both your thoughts and the response in the same language as the input.\\n\\nYour thinking process must follow the template below:[THINK]Your thoughts or/and draft, like working through an exercise on scratch paper. Be as casual and as long as you want until you are confident to generate the response. Use the same language as the input.[/THINK]Here, provide a self-contained response.' %}\n \n{{- bos_token }}\n \n{#- Extract system message if present -#}\n{%- if messages[0]['role'] == 'system' %}\n {%- if messages[0]['content'] is string %}\n {%- set raw_system_message = messages[0]['content'] %}\n {%- else %}\n {%- set raw_system_message = messages[0]['content'][0]['text'] %}\n {%- endif %}\n {%- set loop_messages = messages[1:] %}\n{%- else %}\n {%- set raw_system_message = \"\" %}\n {%- set loop_messages = messages %}\n{%- endif %}\n \n{#- Detect THINK flag by searching for exact phrase \"/think\" -#}\n{%- if \"/think\" in raw_system_message %}\n {%- set THINK = True %}\n{%- else %}\n {%- set THINK = False %}\n{%- endif %}\n \n{#- Apply logic depending on THINK flag -#}\n{%- if THINK %}\n {%- if raw_system_message|length > 0 %}\n {%- set system_message = default_system_message + \"\\n\\n\" + raw_system_message %}\n {%- else %}\n {%- set system_message = default_system_message %}\n {%- endif %}\n {{- '[SYSTEM_PROMPT]' + system_message + '[/SYSTEM_PROMPT]' }}\n{%- else %}\n {%- if raw_system_message|length > 0 %}\n {{- '[SYSTEM_PROMPT]' + raw_system_message + '[/SYSTEM_PROMPT]' }}\n {%- endif %}\n{%- endif %}\n \n \n{#- Tool description appended ONLY to last user message. Edits made by Unsloth #}\n{%- set tools_description = \"\" %}\n{%- set has_tools = false %}\n \n{%- if tools is defined and tools is not none and tools|length > 0 %}\n {%- set has_tools = true %}\n {%- set tools_description = \"[AVAILABLE_TOOLS]\" + (tools | tojson) + \"[/AVAILABLE_TOOLS]\" %}\n {{- tools_description }}\n{%- endif %}\n \n{%- for message in loop_messages %}\n {%- if message['role'] == 'user' %}\n \n {%- if message['content'] is string %}\n {{- '[INST]' + message['content'] + '[/INST]' }}\n {%- else %}\n {{- '[INST]' }}\n {%- for block in message['content'] %}\n {%- if block['type'] == 'text' %}\n {%- if block['text'] is defined %}\n {{- block['text'] }}\n {%- else %}\n {{- block['content'] }}\n {%- endif %}\n {%- elif block['type'] in ['image', 'image_url'] %}\n {{- '[IMG]' }}\n {%- else %}\n {{- raise_exception('Only text and image blocks are supported in message content!') }}\n {%- endif %}\n {%- endfor %}\n {{- '[/INST]' }}\n {%- endif %}\n \n {%- elif message['role'] == 'system' %}\n {%- if message['content'] is string %}\n {{- '[SYSTEM_PROMPT]' + message['content'] + '[/SYSTEM_PROMPT]' }}\n {%- else %}\n {{- '[SYSTEM_PROMPT]' + message['content'][0]['text'] + '[/SYSTEM_PROMPT]' }}\n {%- endif %}\n \n {%- elif message['role'] == 'assistant' %}\n {%- if message['content'] is string %}\n {{- message['content'] }}\n {%- elif message['content'] is iterable %}\n {{- message['content'][0]['text'] }}\n {%- endif %}\n \n {%- if message['tool_calls'] is defined and message['tool_calls'] is not none %}\n {%- for tool in message['tool_calls'] %}\n {%- set arguments = tool['function']['arguments'] %}\n {%- if arguments is not string %}\n {%- set arguments = arguments|tojson %}\n {%- endif %}\n {{- \"[TOOL_CALLS]\" + tool['function']['name'] + \"[ARGS]\" + arguments }}\n {%- endfor %}\n {%- endif %}\n \n {{- eos_token }}\n \n {%- elif message[\"role\"] == \"tool_results\" or message[\"role\"] == \"tool\" %}\n {%- if message.content is defined and message.content.content is defined %}\n {%- set content = message.content.content %}\n {%- else %}\n {%- set content = message.content %}\n {%- endif %}\n {{- \"[TOOL_RESULTS]\" + content|string + \"[/TOOL_RESULTS]\" }}\n \n {%- else %}\n {{- raise_exception('Only user, system, assistant and tool roles are supported!') }}\n {%- endif %}\n{%- endfor %}\n \n{#- Licensed under the Apache License, Version 2.0 (the \\\"License\\\") #}\n",
|
| 9018 |
"processor_class": "PixtralProcessor",
|
| 9019 |
"tokenizer_class": "LlamaTokenizerFast",
|
| 9020 |
"unk_token": "<unk>",
|