Make the tokenizer config match that of the large and base versions (#11)

- Make the tokenizer config match that of the large and base versions (f9611f088d69fc3157ff1878217feee72bda0145)

Co-authored-by: Santiago Castro <bryant1410@users.noreply.huggingface.co>

Files changed (1) hide show

tokenizer_config.json +1 -3

tokenizer_config.json CHANGED Viewed

@@ -1,11 +1,9 @@
 {
   "clean_up_tokenization_spaces": true,
   "cls_token": "[CLS]",
-  "do_basic_tokenize": true,
   "do_lower_case": true,
   "mask_token": "[MASK]",
-  "model_max_length": 1000000000000000019884624838656,
-  "never_split": null,
   "pad_token": "[PAD]",
   "sep_token": "[SEP]",
   "strip_accents": null,

 {
   "clean_up_tokenization_spaces": true,
   "cls_token": "[CLS]",
   "do_lower_case": true,
   "mask_token": "[MASK]",
+  "model_max_length": 512,
   "pad_token": "[PAD]",
   "sep_token": "[SEP]",
   "strip_accents": null,