reader-relik-cie-small / tokenizer_config.json
riccorl's picture
Upload tokenizer
33492c0
{
"add_prefix_space": true,
"additional_special_tokens": [
"[R-0]",
"[R-1]",
"[R-2]",
"[R-3]",
"[R-4]",
"[R-5]",
"[R-6]",
"[R-7]",
"[R-8]",
"[R-9]",
"[R-10]",
"[R-11]",
"[R-12]",
"[R-13]",
"[R-14]",
"[R-15]",
"[R-16]",
"[R-17]",
"[R-18]",
"[R-19]",
"--NME--",
"[E-0]",
"[E-1]",
"[E-2]",
"[E-3]",
"[E-4]",
"[E-5]",
"[E-6]",
"[E-7]",
"[E-8]",
"[E-9]",
"[E-10]",
"[E-11]",
"[E-12]",
"[E-13]",
"[E-14]",
"[E-15]",
"[E-16]",
"[E-17]",
"[E-18]",
"[E-19]",
"[E-20]",
"[E-21]",
"[E-22]",
"[E-23]",
"[E-24]"
],
"bos_token": "[CLS]",
"clean_up_tokenization_spaces": true,
"cls_token": "[CLS]",
"do_lower_case": false,
"eos_token": "[SEP]",
"mask_token": "[MASK]",
"model_max_length": 1000000000000000019884624838656,
"pad_token": "[PAD]",
"sep_token": "[SEP]",
"sp_model_kwargs": {},
"split_by_punct": false,
"tokenizer_class": "DebertaV2Tokenizer",
"unk_token": "[UNK]",
"vocab_type": "spm"
}