mrinaldi commited on
Commit
97110aa
·
verified ·
1 Parent(s): ec59547

Update tokenizer_config.json

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +6 -12
tokenizer_config.json CHANGED
@@ -152,20 +152,14 @@
152
  "rstrip": false,
153
  "single_word": false,
154
  "special": true
155
- },
156
- "32805": {
157
- "content": "<|endoftext|>",
158
- "lstrip": false,
159
- "normalized": false,
160
- "rstrip": false,
161
- "single_word": false,
162
- "special": true
163
  }
164
  },
165
- "bos_token": "<|endoftext|>",
 
 
 
 
166
  "clean_up_tokenization_spaces": false,
167
- "eos_token": "<|endoftext|>",
168
  "model_max_length": 1000000000000000019884624838656,
169
- "tokenizer_class": "GPT2Tokenizer",
170
- "unk_token": "<|endoftext|>"
171
  }
 
152
  "rstrip": false,
153
  "single_word": false,
154
  "special": true
 
 
 
 
 
 
 
 
155
  }
156
  },
157
+ "bos_token": "[BOS]",
158
+ "eos_token": "[EOS]",
159
+ "mask_token": "[MASK]",
160
+ "pad_token": "[PAD]",
161
+ "unk_token": "[UNK]",
162
  "clean_up_tokenization_spaces": false,
 
163
  "model_max_length": 1000000000000000019884624838656,
164
+ "tokenizer_class": "GPT2Tokenizer"
 
165
  }