byte_tokenizer / config.json
cameron-git's picture
Upload folder using huggingface_hub
a6d36a1 verified
{
"bos_token": "<|bos|>",
"eos_token": "<|eos|>",
"unk_token": "<|unk|>",
"sep_token": "<|sep|>",
"pad_token": "<|pad|>",
"cls_token": "<|cls|>",
"mask_token": "<|mask|>",
"clean_up_tokenization_spaces": true,
"model_input_names": ["input_ids", "attention_mask"],
"model_max_length": 4096,
"tokenizer_class": "PreTrainedTokenizerFast"
}