devngho commited on
Commit
01abd33
·
verified ·
1 Parent(s): 6c7a6ba

Upload tokenizer

Browse files
Files changed (3) hide show
  1. README.md +1 -1
  2. tokenizer.json +0 -0
  3. tokenizer_config.json +5 -15
README.md CHANGED
@@ -1,7 +1,7 @@
1
  ---
2
- library_name: transformers
3
  language:
4
  - ko
 
5
  ---
6
 
7
  based on phi-3 tokenizer, expanded 16853 tokens
 
1
  ---
 
2
  language:
3
  - ko
4
+ library_name: transformers
5
  ---
6
 
7
  based on phi-3 tokenizer, expanded 16853 tokens
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -114,19 +114,9 @@
114
  }
115
  },
116
  "bos_token": "<s>",
117
- "clean_up_tokenization_spaces": false,
118
- "eos_token": "<|endoftext|>",
119
- "max_length": 4096,
120
- "model_max_length": 4096,
121
- "pad_to_multiple_of": null,
122
- "pad_token": "<unk>",
123
- "pad_token_type_id": 0,
124
- "padding_side": "left",
125
- "sp_model_kwargs": {},
126
- "stride": 0,
127
- "tokenizer_class": "LlamaTokenizer",
128
- "truncation_side": "right",
129
- "truncation_strategy": "longest_first",
130
- "unk_token": "<unk>",
131
- "use_default_system_prompt": false
132
  }
 
114
  }
115
  },
116
  "bos_token": "<s>",
117
+ "clean_up_tokenization_spaces": true,
118
+ "eos_token": "</s>",
119
+ "model_max_length": 1000000000000000019884624838656,
120
+ "tokenizer_class": "PreTrainedTokenizerFast",
121
+ "unk_token": "<unk>"
 
 
 
 
 
 
 
 
 
 
122
  }