NLP_BPE / tokenizer_config.json
dar5115's picture
Upload 3 files
7671e71 verified
{
"tokenizer_class": "PreTrainedTokenizerFast",
"auto_map": {
"AutoTokenizer": [
"tokenizers.Tokenizer",
null
]
},
"model_type": "bpe",
"vocab_size": 32000,
"unk_token": "[UNK]",
"special_tokens": {
"unk_token": "[UNK]",
"additional_special_tokens": [
"<NUM>",
"<URL>",
"<EMAIL>"
]
},
"model_max_length": 512,
"padding_side": "right",
"truncation_side": "right",
"clean_up_tokenization_spaces": true
}