nursimakgul
/

turkish-tokenizer

+{
+  "vocab_size": 50000,
+  "special_tokens": [
+    "[PAD]",
+    "[UNK]",
+    "[CLS]",
+    "[SEP]",
+    "[MASK]",
+    "[BOS]",
+    "[EOS]",
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|system|>",
+    "<|user|>",
+    "<|assistant|>"
+  ],
+  "model_type": "BPE",
+  "training_corpus": "/content/drive/MyDrive/turkish_nlp_project/FINAL_TURKISH_CORPUS.txt",
+  "training_date": "2025-09-06 13:37:54",
+  "corpus_size_mb": 19.955299377441406
+}