Samuael commited on
Commit
64eb545
·
verified ·
1 Parent(s): 8afcf0e

Upload tokenizer

Browse files
Files changed (2) hide show
  1. special_tokens_map.json +0 -34
  2. tokenizer_config.json +2 -28
special_tokens_map.json CHANGED
@@ -1,41 +1,7 @@
1
  {
2
- "additional_special_tokens": [
3
- "ar_AR",
4
- "cs_CZ",
5
- "de_DE",
6
- "en_XX",
7
- "es_XX",
8
- "et_EE",
9
- "fi_FI",
10
- "fr_XX",
11
- "gu_IN",
12
- "hi_IN",
13
- "it_IT",
14
- "ja_XX",
15
- "kk_KZ",
16
- "ko_KR",
17
- "lt_LT",
18
- "lv_LV",
19
- "my_MM",
20
- "ne_NP",
21
- "nl_XX",
22
- "ro_RO",
23
- "ru_RU",
24
- "si_LK",
25
- "tr_TR",
26
- "vi_VN",
27
- "zh_CN"
28
- ],
29
  "bos_token": "<s>",
30
  "cls_token": "<s>",
31
  "eos_token": "</s>",
32
- "mask_token": {
33
- "content": "<mask>",
34
- "lstrip": true,
35
- "normalized": false,
36
- "rstrip": false,
37
- "single_word": false
38
- },
39
  "pad_token": "<pad>",
40
  "sep_token": "</s>",
41
  "unk_token": "<unk>"
 
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "bos_token": "<s>",
3
  "cls_token": "<s>",
4
  "eos_token": "</s>",
 
 
 
 
 
 
 
5
  "pad_token": "<pad>",
6
  "sep_token": "</s>",
7
  "unk_token": "<unk>"
tokenizer_config.json CHANGED
@@ -241,38 +241,12 @@
241
  "special": true
242
  }
243
  },
244
- "additional_special_tokens": [
245
- "ar_AR",
246
- "cs_CZ",
247
- "de_DE",
248
- "en_XX",
249
- "es_XX",
250
- "et_EE",
251
- "fi_FI",
252
- "fr_XX",
253
- "gu_IN",
254
- "hi_IN",
255
- "it_IT",
256
- "ja_XX",
257
- "kk_KZ",
258
- "ko_KR",
259
- "lt_LT",
260
- "lv_LV",
261
- "my_MM",
262
- "ne_NP",
263
- "nl_XX",
264
- "ro_RO",
265
- "ru_RU",
266
- "si_LK",
267
- "tr_TR",
268
- "vi_VN",
269
- "zh_CN"
270
- ],
271
  "bos_token": "<s>",
272
  "clean_up_tokenization_spaces": true,
273
  "cls_token": "<s>",
274
  "eos_token": "</s>",
275
- "mask_token": "<mask>",
276
  "model_max_length": 1000000000000000019884624838656,
277
  "pad_token": "<pad>",
278
  "sep_token": "</s>",
 
241
  "special": true
242
  }
243
  },
244
+ "additional_special_tokens": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
245
  "bos_token": "<s>",
246
  "clean_up_tokenization_spaces": true,
247
  "cls_token": "<s>",
248
  "eos_token": "</s>",
249
+ "mask_token": null,
250
  "model_max_length": 1000000000000000019884624838656,
251
  "pad_token": "<pad>",
252
  "sep_token": "</s>",