QuarkTokenizer / special_tokens_map.json
ThingsAI's picture
Quark BPE tokenizer EN+IT 65536 vocab v1
9fb41e6 verified
{
"additional_special_tokens": [
"<|system|>",
"<|user|>",
"<|assistant|>",
"<|endofturn|>",
"<|thinking|>",
"<|/thinking|>",
"<|reserved_0|>",
"<|reserved_1|>",
"<|reserved_2|>",
"<|reserved_3|>",
"<|reserved_4|>",
"<|reserved_5|>",
"<|reserved_6|>",
"<|reserved_7|>",
"<|reserved_8|>",
"<|reserved_9|>",
"<|reserved_10|>",
"<|reserved_11|>",
"<|reserved_12|>",
"<|reserved_13|>",
"<|reserved_14|>",
"<|reserved_15|>",
"<|reserved_16|>",
"<|reserved_17|>",
"<|reserved_18|>",
"<|reserved_19|>",
"<|reserved_20|>",
"<|reserved_21|>",
"<|reserved_22|>",
"<|reserved_23|>",
"<|reserved_24|>",
"<|reserved_25|>",
"<|reserved_26|>",
"<|reserved_27|>",
"<|reserved_28|>",
"<|reserved_29|>",
"<|reserved_30|>",
"<|reserved_31|>",
"<|reserved_32|>",
"<|reserved_33|>",
"<|reserved_34|>",
"<|reserved_35|>",
"<|reserved_36|>",
"<|reserved_37|>",
"<|reserved_38|>",
"<|reserved_39|>",
"<|reserved_40|>",
"<|reserved_41|>",
"<|reserved_42|>",
"<|reserved_43|>",
"<|reserved_44|>",
"<|reserved_45|>",
"<|reserved_46|>",
"<|reserved_47|>",
"<|reserved_48|>",
"<|reserved_49|>",
"<|reserved_50|>",
"<|reserved_51|>",
"<|reserved_52|>",
"<|reserved_53|>"
],
"bos_token": {
"content": "<s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"eos_token": {
"content": "</s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"pad_token": {
"content": "<pad>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"unk_token": {
"content": "<unk>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
}
}