lemms commited on
Commit
a096af4
·
verified ·
1 Parent(s): 14c27b1

Upload llm\data\tokenizer\tokenizer_config.json with huggingface_hub

Browse files
llm//data//tokenizer//tokenizer_config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "tokenizer_class": "SentencePieceTokenizer",
3
+ "model_type": "bpe",
4
+ "vocab_size": 32000,
5
+ "model_file": "tokenizer.model",
6
+ "special_tokens": {
7
+ "pad_token": "<pad>",
8
+ "unk_token": "<unk>",
9
+ "bos_token": "<s>",
10
+ "eos_token": "</s>"
11
+ },
12
+ "special_token_ids": {
13
+ "pad_token_id": 0,
14
+ "unk_token_id": 1,
15
+ "bos_token_id": 2,
16
+ "eos_token_id": 3
17
+ }
18
+ }