estebancarlin commited on
Commit
6fc4303
·
verified ·
1 Parent(s): 9329e33

Upload complete GPT-2 tokenizer_config.json to main

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +15 -12
tokenizer_config.json CHANGED
@@ -1,6 +1,18 @@
1
  {
2
- "add_bos_token": false,
 
 
 
 
 
 
 
 
 
 
3
  "add_prefix_space": false,
 
 
4
  "added_tokens_decoder": {
5
  "50256": {
6
  "content": "<|endoftext|>",
@@ -10,14 +22,5 @@
10
  "single_word": false,
11
  "special": true
12
  }
13
- },
14
- "bos_token": "<|endoftext|>",
15
- "clean_up_tokenization_spaces": false,
16
- "eos_token": "<|endoftext|>",
17
- "errors": "replace",
18
- "extra_special_tokens": {},
19
- "model_max_length": 1024,
20
- "pad_token": "<|endoftext|>",
21
- "tokenizer_class": "GPT2Tokenizer",
22
- "unk_token": "<|endoftext|>"
23
- }
 
1
  {
2
+ "tokenizer_class": "GPT2Tokenizer",
3
+ "auto_map": {
4
+ "AutoTokenizer": [
5
+ "transformers",
6
+ "GPT2Tokenizer"
7
+ ]
8
+ },
9
+ "bos_token": "<|endoftext|>",
10
+ "eos_token": "<|endoftext|>",
11
+ "pad_token": "<|endoftext|>",
12
+ "unk_token": "<|endoftext|>",
13
  "add_prefix_space": false,
14
+ "model_max_length": 1024,
15
+ "clean_up_tokenization_spaces": true,
16
  "added_tokens_decoder": {
17
  "50256": {
18
  "content": "<|endoftext|>",
 
22
  "single_word": false,
23
  "special": true
24
  }
25
+ }
26
+ }