C. Servan commited on
Commit
e51c021
·
1 Parent(s): ffe3a2b

update tokenizer config

Browse files
Files changed (2) hide show
  1. special_tokens_map.json +2 -0
  2. tokenizer_config.json +2 -0
special_tokens_map.json CHANGED
@@ -1,7 +1,9 @@
1
  {
2
  "cls_token": "[CLS]",
 
3
  "mask_token": "[MASK]",
4
  "pad_token": "[PAD]",
5
  "sep_token": "[SEP]",
 
6
  "unk_token": "[UNK]"
7
  }
 
1
  {
2
  "cls_token": "[CLS]",
3
+ "bos_token": "[CLS]",
4
  "mask_token": "[MASK]",
5
  "pad_token": "[PAD]",
6
  "sep_token": "[SEP]",
7
+ "eos_token": "[SEP]",
8
  "unk_token": "[UNK]"
9
  }
tokenizer_config.json CHANGED
@@ -8043,6 +8043,7 @@
8043
  },
8044
  "clean_up_tokenization_spaces": true,
8045
  "cls_token": "[CLS]",
 
8046
  "extra_special_tokens": {},
8047
  "mask_token": "[MASK]",
8048
  "model_input_names": [
@@ -8052,6 +8053,7 @@
8052
  "model_max_length": 8192,
8053
  "pad_token": "[PAD]",
8054
  "sep_token": "[SEP]",
 
8055
  "tokenizer_class": "PreTrainedTokenizer",
8056
  "unk_token": "[UNK]"
8057
  }
 
8043
  },
8044
  "clean_up_tokenization_spaces": true,
8045
  "cls_token": "[CLS]",
8046
+ "bos_token": "[CLS]",
8047
  "extra_special_tokens": {},
8048
  "mask_token": "[MASK]",
8049
  "model_input_names": [
 
8053
  "model_max_length": 8192,
8054
  "pad_token": "[PAD]",
8055
  "sep_token": "[SEP]",
8056
+ "eos_token": "[SEP]",
8057
  "tokenizer_class": "PreTrainedTokenizer",
8058
  "unk_token": "[UNK]"
8059
  }