Commit
·
a755aa7
1
Parent(s):
2a7951f
Update tiktoken.py
Browse files- tiktoken.py +37 -13
tiktoken.py
CHANGED
|
@@ -172,26 +172,50 @@ class TiktokenTokenizerWrapper(PreTrainedTokenizer):
|
|
| 172 |
|
| 173 |
Pinning default Chat ML template in case defaults change.
|
| 174 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
template = (
|
| 176 |
-
"{% set loop_messages = messages %}"
|
| 177 |
-
"{% set system_message = '' %}"
|
| 178 |
"{% if messages[0]['role'] == 'system' %}"
|
| 179 |
-
"{% set loop_messages = messages[1:] %}"
|
| 180 |
"{% set system_message = messages[0]['content'] %}"
|
| 181 |
-
"{
|
| 182 |
-
"{%
|
|
|
|
|
|
|
| 183 |
"{% set loop_messages = messages %}"
|
| 184 |
-
"{% set system_message =
|
| 185 |
-
|
| 186 |
-
"{%
|
| 187 |
-
"{
|
| 188 |
-
|
| 189 |
-
|
|
|
|
|
|
|
|
|
|
| 190 |
"{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}"
|
|
|
|
| 191 |
'{% if (add_generation_prompt == true) %}'
|
| 192 |
"{{'<|im_start|>' + 'assistant' + '\n'}}"
|
| 193 |
-
|
| 194 |
-
|
| 195 |
template = template.replace(
|
| 196 |
'USE_DEFAULT_PROMPT',
|
| 197 |
'true' if self.use_default_system_prompt else 'false')
|
|
|
|
| 172 |
|
| 173 |
Pinning default Chat ML template in case defaults change.
|
| 174 |
"""
|
| 175 |
+
# template = (
|
| 176 |
+
# "{% set loop_messages = messages %}"
|
| 177 |
+
# "{% set system_message = '' %}"
|
| 178 |
+
# "{% if messages[0]['role'] == 'system' %}"
|
| 179 |
+
# "{% set loop_messages = messages[1:] %}"
|
| 180 |
+
# "{% set system_message = messages[0]['content'] %}"
|
| 181 |
+
# "{{'<|im_start|>system\n' + system_message}}"
|
| 182 |
+
# "{% elif USE_DEFAULT_PROMPT == true and not 'system' in messages[0]['role']%}"
|
| 183 |
+
# "{% set loop_messages = messages %}"
|
| 184 |
+
# "{% set system_message = 'DEFAULT_SYSTEM_PROMPT' %}"
|
| 185 |
+
# '{% endif %}'
|
| 186 |
+
# "{% if not (system_message == '') %}"
|
| 187 |
+
# "{{'<|im_start|>system\n' + system_message}}"
|
| 188 |
+
# '{% endif %}'
|
| 189 |
+
# '{% for message in loop_messages %}'
|
| 190 |
+
# "{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}"
|
| 191 |
+
# '{% if (add_generation_prompt == true) %}'
|
| 192 |
+
# "{{'<|im_start|>' + 'assistant' + '\n'}}"
|
| 193 |
+
# '{% endif %}'
|
| 194 |
+
# '{% endfor %}')
|
| 195 |
template = (
|
|
|
|
|
|
|
| 196 |
"{% if messages[0]['role'] == 'system' %}"
|
| 197 |
+
"{% set loop_messages = messages[1:] %}" # Extract system message if it's present
|
| 198 |
"{% set system_message = messages[0]['content'] %}"
|
| 199 |
+
"{% elif USE_DEFAULT_PROMPT == true and not 'system' in messages[0]['role'] %}"
|
| 200 |
+
"{% set loop_messages = messages %}" # Or use the default system message if the flag is set
|
| 201 |
+
"{% set system_message = 'DEFAULT_SYSTEM_MESSAGE' %}"
|
| 202 |
+
"{% else %}"
|
| 203 |
"{% set loop_messages = messages %}"
|
| 204 |
+
"{% set system_message = false %}"
|
| 205 |
+
"{% endif %}"
|
| 206 |
+
"{% for message in loop_messages %}" # Loop over all non-system messages
|
| 207 |
+
"{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}"
|
| 208 |
+
"{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}"
|
| 209 |
+
"{% endif %}"
|
| 210 |
+
"{% if loop.index0 == 0 and system_message != false %}" # Embed system message in first message
|
| 211 |
+
"{{ '<|im_start|>system\n' + system_message.strip() + '\n'}}"
|
| 212 |
+
"{% else %}"
|
| 213 |
"{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}"
|
| 214 |
+
"{% endif %}"
|
| 215 |
'{% if (add_generation_prompt == true) %}'
|
| 216 |
"{{'<|im_start|>' + 'assistant' + '\n'}}"
|
| 217 |
+
"{% else %}"
|
| 218 |
+
"{{ eos_token }}"
|
| 219 |
template = template.replace(
|
| 220 |
'USE_DEFAULT_PROMPT',
|
| 221 |
'true' if self.use_default_system_prompt else 'false')
|