Azrail commited on
Commit
0a6c273
·
verified ·
1 Parent(s): 34f3cd9

Upload tokenizer

Browse files
special_tokens_map.json CHANGED
@@ -1,4 +1,8 @@
1
  {
 
 
 
 
2
  "bos_token": {
3
  "content": "<|beginoftext|>",
4
  "lstrip": false,
 
1
  {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>"
5
+ ],
6
  "bos_token": {
7
  "content": "<|beginoftext|>",
8
  "lstrip": false,
tokenizer.json CHANGED
@@ -23,7 +23,7 @@
23
  },
24
  {
25
  "id": 2,
26
- "content": "<|reserved_token_1|>",
27
  "single_word": false,
28
  "lstrip": false,
29
  "rstrip": false,
@@ -32,7 +32,7 @@
32
  },
33
  {
34
  "id": 3,
35
- "content": "<|reserved_token_2|>",
36
  "single_word": false,
37
  "lstrip": false,
38
  "rstrip": false,
@@ -215,8 +215,8 @@
215
  "vocab": {
216
  "<|endoftext|>": 0,
217
  "<|beginoftext|>": 1,
218
- "<|reserved_token_1|>": 2,
219
- "<|reserved_token_2|>": 3,
220
  "<|reserved_token_3|>": 4,
221
  "<|reserved_token_4|>": 5,
222
  "<|reserved_token_5|>": 6,
 
23
  },
24
  {
25
  "id": 2,
26
+ "content": "<|im_start|>",
27
  "single_word": false,
28
  "lstrip": false,
29
  "rstrip": false,
 
32
  },
33
  {
34
  "id": 3,
35
+ "content": "<|im_end|>",
36
  "single_word": false,
37
  "lstrip": false,
38
  "rstrip": false,
 
215
  "vocab": {
216
  "<|endoftext|>": 0,
217
  "<|beginoftext|>": 1,
218
+ "<|im_start|>": 2,
219
+ "<|im_end|>": 3,
220
  "<|reserved_token_3|>": 4,
221
  "<|reserved_token_4|>": 5,
222
  "<|reserved_token_5|>": 6,
tokenizer_config.json CHANGED
@@ -17,7 +17,7 @@
17
  "special": true
18
  },
19
  "2": {
20
- "content": "<|reserved_token_1|>",
21
  "lstrip": false,
22
  "normalized": false,
23
  "rstrip": false,
@@ -25,7 +25,7 @@
25
  "special": true
26
  },
27
  "3": {
28
- "content": "<|reserved_token_2|>",
29
  "lstrip": false,
30
  "normalized": false,
31
  "rstrip": false,
@@ -105,7 +105,12 @@
105
  "special": true
106
  }
107
  },
 
 
 
 
108
  "bos_token": "<|beginoftext|>",
 
109
  "clean_up_tokenization_spaces": false,
110
  "eos_token": "<|endoftext|>",
111
  "extra_special_tokens": {},
 
17
  "special": true
18
  },
19
  "2": {
20
+ "content": "<|im_start|>",
21
  "lstrip": false,
22
  "normalized": false,
23
  "rstrip": false,
 
25
  "special": true
26
  },
27
  "3": {
28
+ "content": "<|im_end|>",
29
  "lstrip": false,
30
  "normalized": false,
31
  "rstrip": false,
 
105
  "special": true
106
  }
107
  },
108
+ "additional_special_tokens": [
109
+ "<|im_start|>",
110
+ "<|im_end|>"
111
+ ],
112
  "bos_token": "<|beginoftext|>",
113
+ "chat_template": "{% for message in messages %}{% if message.get('role') is not none %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% else %}{{message['content'] + '<|im_end|>' + '\n'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
114
  "clean_up_tokenization_spaces": false,
115
  "eos_token": "<|endoftext|>",
116
  "extra_special_tokens": {},