Quark-135m-Bilingual / tokenizer_config.json
ThingsAI's picture
Upload 5 files
d9d2313 verified
raw
history blame contribute delete
549 Bytes
{
"backend": "tokenizers",
"bos_token": "<s>",
"clean_up_tokenization_spaces": false,
"created_by": "OvercastLab",
"description": "Quark bilingual EN+IT tokenizer — BPE byte-level 65536 vocab",
"eos_token": "</s>",
"extra_special_tokens": [
"<|user|>",
"<|assistant|>",
"<|end|>"
],
"is_local": true,
"languages": [
"en",
"it"
],
"local_files_only": false,
"model_max_length": 2048,
"pad_token": "<pad>",
"padding_side": "right",
"tokenizer_class": "TokenizersBackend",
"unk_token": "<unk>"
}