pandora-s commited on
Commit
45c4e39
·
verified ·
1 Parent(s): 83e9aa1

Update Chat Template - V3 Tokenizer

Browse files

Using `mistral-common` as ground truth, updating the chat template to follow the same as `MistralTokenizer.v3`.
This new chat template should format in the following way:

```
<s>[INST] Hello[/INST] Hi there!</s>[INST] How are you?[/INST] Fine and you?</s>[INST] Fine thank you.[/INST]
```

For reference, here is the output from `MistralTokenizer.v3`:

```
<s>[INST]▁Hello[/INST]▁Hi▁there!</s>[INST]▁How▁are▁you?[/INST]▁Fine▁and▁you?</s>[INST]▁Fine▁thank▁you.[/INST]
```

Files changed (1) hide show
  1. tokenizer_config.json +1 -1
tokenizer_config.json CHANGED
@@ -6173,7 +6173,7 @@
6173
  }
6174
  },
6175
  "bos_token": "<s>",
6176
- "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}",
6177
  "clean_up_tokenization_spaces": false,
6178
  "eos_token": "</s>",
6179
  "legacy": false,
 
6173
  }
6174
  },
6175
  "bos_token": "<s>",
6176
+ "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + '[/INST] ' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}",
6177
  "clean_up_tokenization_spaces": false,
6178
  "eos_token": "</s>",
6179
  "legacy": false,