zxc4wewewe commited on
Commit
6e5c656
·
verified ·
1 Parent(s): 475dc26

Upload tokenizer

Browse files
added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "<|eom_id|>": 32769,
3
+ "<|eot_id|>": 32768
4
+ }
special_tokens_map.json CHANGED
@@ -1,4 +1,13 @@
1
  {
 
 
 
 
 
 
 
 
 
2
  "bos_token": {
3
  "content": "<s>",
4
  "lstrip": false,
@@ -7,13 +16,19 @@
7
  "single_word": false
8
  },
9
  "eos_token": {
 
 
 
 
 
 
 
10
  "content": "</s>",
11
  "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
- "pad_token": "</s>",
17
  "unk_token": {
18
  "content": "<unk>",
19
  "lstrip": false,
 
1
  {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "<|eom_id|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ }
10
+ ],
11
  "bos_token": {
12
  "content": "<s>",
13
  "lstrip": false,
 
16
  "single_word": false
17
  },
18
  "eos_token": {
19
+ "content": "<|eot_id|>",
20
+ "lstrip": false,
21
+ "normalized": false,
22
+ "rstrip": false,
23
+ "single_word": false
24
+ },
25
+ "pad_token": {
26
  "content": "</s>",
27
  "lstrip": false,
28
  "normalized": false,
29
  "rstrip": false,
30
  "single_word": false
31
  },
 
32
  "unk_token": {
33
  "content": "<unk>",
34
  "lstrip": false,
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:60c3fc985cbfedcb429d05994efe548bdfecd6a00226fcdc8380c36fd894a3be
3
- size 3671968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:207b7554d70eb387ac7cbd081d396c5caa7fa8b5db7a665a17f9ebc7e12a4a68
3
+ size 3672340
tokenizer_config.json CHANGED
@@ -6170,11 +6170,30 @@
6170
  "rstrip": false,
6171
  "single_word": false,
6172
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6173
  }
6174
  },
 
 
 
6175
  "bos_token": "<s>",
6176
  "clean_up_tokenization_spaces": false,
6177
- "eos_token": "</s>",
6178
  "extra_special_tokens": {},
6179
  "legacy": false,
6180
  "model_max_length": 1000000000000000019884624838656,
 
6170
  "rstrip": false,
6171
  "single_word": false,
6172
  "special": true
6173
+ },
6174
+ "32768": {
6175
+ "content": "<|eot_id|>",
6176
+ "lstrip": false,
6177
+ "normalized": false,
6178
+ "rstrip": false,
6179
+ "single_word": false,
6180
+ "special": true
6181
+ },
6182
+ "32769": {
6183
+ "content": "<|eom_id|>",
6184
+ "lstrip": false,
6185
+ "normalized": false,
6186
+ "rstrip": false,
6187
+ "single_word": false,
6188
+ "special": true
6189
  }
6190
  },
6191
+ "additional_special_tokens": [
6192
+ "<|eom_id|>"
6193
+ ],
6194
  "bos_token": "<s>",
6195
  "clean_up_tokenization_spaces": false,
6196
+ "eos_token": "<|eot_id|>",
6197
  "extra_special_tokens": {},
6198
  "legacy": false,
6199
  "model_max_length": 1000000000000000019884624838656,