rajammanabrolu commited on
Commit
a755aa7
·
1 Parent(s): 2a7951f

Update tiktoken.py

Browse files
Files changed (1) hide show
  1. tiktoken.py +37 -13
tiktoken.py CHANGED
@@ -172,26 +172,50 @@ class TiktokenTokenizerWrapper(PreTrainedTokenizer):
172
 
173
  Pinning default Chat ML template in case defaults change.
174
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
  template = (
176
- "{% set loop_messages = messages %}"
177
- "{% set system_message = '' %}"
178
  "{% if messages[0]['role'] == 'system' %}"
179
- "{% set loop_messages = messages[1:] %}"
180
  "{% set system_message = messages[0]['content'] %}"
181
- "{{'<|im_start|>system\n' + system_message}}"
182
- "{% elif USE_DEFAULT_PROMPT == true and not 'system' in messages[0]['role']%}"
 
 
183
  "{% set loop_messages = messages %}"
184
- "{% set system_message = 'DEFAULT_SYSTEM_PROMPT' %}"
185
- '{% endif %}'
186
- "{% if not (system_message == '') %}"
187
- "{{'<|im_start|>system\n' + system_message}}"
188
- '{% endif %}'
189
- '{% for message in loop_messages %}'
 
 
 
190
  "{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}"
 
191
  '{% if (add_generation_prompt == true) %}'
192
  "{{'<|im_start|>' + 'assistant' + '\n'}}"
193
- '{% endif %}'
194
- '{% endfor %}')
195
  template = template.replace(
196
  'USE_DEFAULT_PROMPT',
197
  'true' if self.use_default_system_prompt else 'false')
 
172
 
173
  Pinning default Chat ML template in case defaults change.
174
  """
175
+ # template = (
176
+ # "{% set loop_messages = messages %}"
177
+ # "{% set system_message = '' %}"
178
+ # "{% if messages[0]['role'] == 'system' %}"
179
+ # "{% set loop_messages = messages[1:] %}"
180
+ # "{% set system_message = messages[0]['content'] %}"
181
+ # "{{'<|im_start|>system\n' + system_message}}"
182
+ # "{% elif USE_DEFAULT_PROMPT == true and not 'system' in messages[0]['role']%}"
183
+ # "{% set loop_messages = messages %}"
184
+ # "{% set system_message = 'DEFAULT_SYSTEM_PROMPT' %}"
185
+ # '{% endif %}'
186
+ # "{% if not (system_message == '') %}"
187
+ # "{{'<|im_start|>system\n' + system_message}}"
188
+ # '{% endif %}'
189
+ # '{% for message in loop_messages %}'
190
+ # "{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}"
191
+ # '{% if (add_generation_prompt == true) %}'
192
+ # "{{'<|im_start|>' + 'assistant' + '\n'}}"
193
+ # '{% endif %}'
194
+ # '{% endfor %}')
195
  template = (
 
 
196
  "{% if messages[0]['role'] == 'system' %}"
197
+ "{% set loop_messages = messages[1:] %}" # Extract system message if it's present
198
  "{% set system_message = messages[0]['content'] %}"
199
+ "{% elif USE_DEFAULT_PROMPT == true and not 'system' in messages[0]['role'] %}"
200
+ "{% set loop_messages = messages %}" # Or use the default system message if the flag is set
201
+ "{% set system_message = 'DEFAULT_SYSTEM_MESSAGE' %}"
202
+ "{% else %}"
203
  "{% set loop_messages = messages %}"
204
+ "{% set system_message = false %}"
205
+ "{% endif %}"
206
+ "{% for message in loop_messages %}" # Loop over all non-system messages
207
+ "{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}"
208
+ "{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}"
209
+ "{% endif %}"
210
+ "{% if loop.index0 == 0 and system_message != false %}" # Embed system message in first message
211
+ "{{ '<|im_start|>system\n' + system_message.strip() + '\n'}}"
212
+ "{% else %}"
213
  "{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}"
214
+ "{% endif %}"
215
  '{% if (add_generation_prompt == true) %}'
216
  "{{'<|im_start|>' + 'assistant' + '\n'}}"
217
+ "{% else %}"
218
+ "{{ eos_token }}"
219
  template = template.replace(
220
  'USE_DEFAULT_PROMPT',
221
  'true' if self.use_default_system_prompt else 'false')