moBERTo / tokenizer_config.json
thiagolaitz's picture
initial upload from gs://maritica-us-central1
9d6cf11 verified
Raw
History Blame Contribute Delete
433 Bytes
{
"backend": "tokenizers",
"clean_up_tokenization_spaces": false,
"cls_token": "[CLS]",
"is_local": false,
"mask_token": "[MASK]",
"merges_file": "artifacts/modernbert_bpe_tokenizer_artifacts/merges.txt",
"model_max_length": 8192,
"pad_token": "[PAD]",
"sep_token": "[SEP]",
"tokenizer_class": "PreTrainedTokenizerFast",
"unk_token": "[UNK]",
"model_input_names": [
"input_ids",
"attention_mask"
]
}