kmfoda commited on
Commit
abf7537
·
verified ·
1 Parent(s): f6a2c49

Upload tokenizer_config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +21 -0
tokenizer_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "backend": "tokenizers",
4
+ "bos_token": "<|bos|>",
5
+ "eos_token": "<|eos|>",
6
+ "errors": "replace",
7
+ "extra_special_tokens": [
8
+ "<|user_start|>",
9
+ "<|user_end|>",
10
+ "<|assistant_start|>",
11
+ "<|assistant_end|>",
12
+ "<|system_start|>",
13
+ "<|system_end|>"
14
+ ],
15
+ "is_local": false,
16
+ "model_max_length": 1024,
17
+ "pad_token": null,
18
+ "tokenizer_class": "GPT2Tokenizer",
19
+ "unk_token": "<|endoftext|>",
20
+ "chat_template": "{% for message in messages %}{% if message['role'] == 'system' %}<|system_start|>{{ message['content'] }}<|system_end|>{% elif message['role'] == 'user' %}<|user_start|>{{ message['content'] }}<|user_end|>{% elif message['role'] == 'assistant' %}<|assistant_start|>{{ message['content'] }}<|assistant_end|>{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant_start|>{% endif %}"
21
+ }