turkish-tokenizer / tokenizer_config.json
nursimakgul's picture
Upload tokenizer_config.json with huggingface_hub
0e546fa verified
{
"vocab_size": 50000,
"special_tokens": [
"[PAD]",
"[UNK]",
"[CLS]",
"[SEP]",
"[MASK]",
"[BOS]",
"[EOS]",
"<|im_start|>",
"<|im_end|>",
"<|system|>",
"<|user|>",
"<|assistant|>"
],
"model_type": "BPE",
"training_corpus": "/content/drive/MyDrive/turkish_nlp_project/FINAL_TURKISH_CORPUS.txt",
"training_date": "2025-09-06 13:37:54",
"corpus_size_mb": 19.955299377441406
}