DINGDINGBELLS commited on
Commit
357e238
·
verified ·
1 Parent(s): 32412c1

Update tokenizer_config.json

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +14 -4
tokenizer_config.json CHANGED
@@ -1,9 +1,19 @@
1
  {
 
 
2
  "bos_token": "<|endoftext|>",
 
3
  "eos_token": "<|endoftext|>",
4
- "unk_token": "<|endoftext|>",
5
  "pad_token": "[PAD]",
6
- "add_prefix_space": false,
7
- "model_type": "gpt2",
8
- "tokenizer_class": "GPT2Tokenizer"
 
 
 
 
 
 
 
9
  }
 
1
  {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
  "bos_token": "<|endoftext|>",
5
+ "clean_up_tokenization_spaces": true,
6
  "eos_token": "<|endoftext|>",
7
+ "model_max_length": 2048,
8
  "pad_token": "[PAD]",
9
+ "tokenizer_class": "GPT2Tokenizer",
10
+ "additional_special_tokens": [
11
+ "<|vision_16|>",
12
+ "<|logic_32|>",
13
+ "<|lang_ru|>",
14
+ "<|lang_en|>",
15
+ "<|lang_ua|>",
16
+ "<|lang_by|>"
17
+ ],
18
+ "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\\n' + message['content'] + '<|im_end|>' + '\\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\\n' }}{% endif %}"
19
  }