panf9 commited on
Commit
68e7700
·
verified ·
1 Parent(s): 0e70642

Upload tokenizer

Browse files
Files changed (2) hide show
  1. special_tokens_map.json +1 -0
  2. tokenizer_config.json +2 -1
special_tokens_map.json CHANGED
@@ -53,6 +53,7 @@
53
  "rstrip": false,
54
  "single_word": false
55
  },
 
56
  "unk_token": {
57
  "content": "<|endoftext|>",
58
  "lstrip": false,
 
53
  "rstrip": false,
54
  "single_word": false
55
  },
56
+ "pad_token": "<|endoftext|>",
57
  "unk_token": {
58
  "content": "<|endoftext|>",
59
  "lstrip": false,
tokenizer_config.json CHANGED
@@ -350,7 +350,8 @@
350
  "clean_up_tokenization_spaces": true,
351
  "eos_token": "<|endoftext|>",
352
  "extra_special_tokens": {},
353
- "model_max_length": 1000000000000000019884624838656,
 
354
  "tokenizer_class": "GPT2Tokenizer",
355
  "unk_token": "<|endoftext|>",
356
  "vocab_size": 49152
 
350
  "clean_up_tokenization_spaces": true,
351
  "eos_token": "<|endoftext|>",
352
  "extra_special_tokens": {},
353
+ "model_max_length": 2048,
354
+ "pad_token": "<|endoftext|>",
355
  "tokenizer_class": "GPT2Tokenizer",
356
  "unk_token": "<|endoftext|>",
357
  "vocab_size": 49152