dar5115 commited on
Commit
7671e71
·
verified ·
1 Parent(s): d246ab1

Upload 3 files

Browse files
Files changed (3) hide show
  1. merges.txt +0 -0
  2. tokenizer_config.json +24 -0
  3. vocab.json +0 -0
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "tokenizer_class": "PreTrainedTokenizerFast",
3
+ "auto_map": {
4
+ "AutoTokenizer": [
5
+ "tokenizers.Tokenizer",
6
+ null
7
+ ]
8
+ },
9
+ "model_type": "bpe",
10
+ "vocab_size": 32000,
11
+ "unk_token": "[UNK]",
12
+ "special_tokens": {
13
+ "unk_token": "[UNK]",
14
+ "additional_special_tokens": [
15
+ "<NUM>",
16
+ "<URL>",
17
+ "<EMAIL>"
18
+ ]
19
+ },
20
+ "model_max_length": 512,
21
+ "padding_side": "right",
22
+ "truncation_side": "right",
23
+ "clean_up_tokenization_spaces": true
24
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff