ogma-micro / tokenizer_config.json
Antreas's picture
Add OgmaTokenizerFast + model.embed() high-level API
f552828
raw
history blame contribute delete
428 Bytes
{
"tokenizer_class": "OgmaTokenizerFast",
"auto_map": {
"AutoTokenizer": [
null,
"tokenization_ogma.OgmaTokenizerFast"
]
},
"model_max_length": 1024,
"padding_side": "right",
"pad_token": "<pad>",
"unk_token": "<unk>",
"cls_token": "[CLS]",
"sep_token": "[SEP]",
"bos_token": "[CLS]",
"eos_token": "[SEP]",
"mask_token": "[MASK]",
"do_lower_case": true,
"backend": "tokenizers"
}