dakadkart commited on
Commit
08f8ebb
·
1 Parent(s): 097400d

Upload tokenizer

Browse files
special_tokens_map.json CHANGED
@@ -1,4 +1,7 @@
1
  {
 
 
 
2
  "bos_token": "<|endoftext|>",
3
  "eos_token": "<|endoftext|>",
4
  "unk_token": "<|endoftext|>"
 
1
  {
2
+ "additional_special_tokens": [
3
+ "<|endoftext|>"
4
+ ],
5
  "bos_token": "<|endoftext|>",
6
  "eos_token": "<|endoftext|>",
7
  "unk_token": "<|endoftext|>"
tokenizer.json CHANGED
@@ -9,7 +9,7 @@
9
  "single_word": false,
10
  "lstrip": false,
11
  "rstrip": false,
12
- "normalized": true,
13
  "special": true
14
  }
15
  ],
 
9
  "single_word": false,
10
  "lstrip": false,
11
  "rstrip": false,
12
+ "normalized": false,
13
  "special": true
14
  }
15
  ],
tokenizer_config.json CHANGED
@@ -5,13 +5,15 @@
5
  "50256": {
6
  "content": "<|endoftext|>",
7
  "lstrip": false,
8
- "normalized": true,
9
  "rstrip": false,
10
  "single_word": false,
11
  "special": true
12
  }
13
  },
14
- "additional_special_tokens": [],
 
 
15
  "bos_token": "<|endoftext|>",
16
  "clean_up_tokenization_spaces": true,
17
  "eos_token": "<|endoftext|>",
 
5
  "50256": {
6
  "content": "<|endoftext|>",
7
  "lstrip": false,
8
+ "normalized": false,
9
  "rstrip": false,
10
  "single_word": false,
11
  "special": true
12
  }
13
  },
14
+ "additional_special_tokens": [
15
+ "<|endoftext|>"
16
+ ],
17
  "bos_token": "<|endoftext|>",
18
  "clean_up_tokenization_spaces": true,
19
  "eos_token": "<|endoftext|>",