nursimakgul commited on
Commit
0e546fa
·
verified ·
1 Parent(s): 9ed1ee8

Upload tokenizer_config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +21 -0
tokenizer_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "vocab_size": 50000,
3
+ "special_tokens": [
4
+ "[PAD]",
5
+ "[UNK]",
6
+ "[CLS]",
7
+ "[SEP]",
8
+ "[MASK]",
9
+ "[BOS]",
10
+ "[EOS]",
11
+ "<|im_start|>",
12
+ "<|im_end|>",
13
+ "<|system|>",
14
+ "<|user|>",
15
+ "<|assistant|>"
16
+ ],
17
+ "model_type": "BPE",
18
+ "training_corpus": "/content/drive/MyDrive/turkish_nlp_project/FINAL_TURKISH_CORPUS.txt",
19
+ "training_date": "2025-09-06 13:37:54",
20
+ "corpus_size_mb": 19.955299377441406
21
+ }