ezellm-lite-tokenizer / tiktoken.json
TerminatorPower's picture
Upload ezellm-lite tokenizer (24,600 vocab, FIM + repo specials)
36f1b8c verified
{
"pattern": "'(?i:[sdmt]|ll|ve|re)|[^\\r\\n\\p{L}\\p{N}]?+\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]++[\\r\\n]*|\\s*[\\r\\n]|\\s+(?!\\S)|\\s+",
"special_tokens": {
"<|endoftext|>": 24576,
"<|fim_prefix|>": 24577,
"<|fim_middle|>": 24578,
"<|fim_suffix|>": 24579,
"<|fim_pad|>": 24580,
"<|file_sep|>": 24581,
"<|repo_name|>": 24582,
"<|filename|>": 24583,
"<|reserved_0|>": 24584,
"<|reserved_1|>": 24585,
"<|reserved_2|>": 24586,
"<|reserved_3|>": 24587,
"<|reserved_4|>": 24588,
"<|reserved_5|>": 24589,
"<|reserved_6|>": 24590,
"<|reserved_7|>": 24591,
"<|reserved_8|>": 24592,
"<|reserved_9|>": 24593,
"<|reserved_10|>": 24594,
"<|reserved_11|>": 24595,
"<|reserved_12|>": 24596,
"<|reserved_13|>": 24597,
"<|reserved_14|>": 24598,
"<|reserved_15|>": 24599
}
}