foxanthis commited on
Commit
c719ef9
·
1 Parent(s): e838f05

Upload tokenizer

Browse files
added_tokens.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "\t\t": 50257,
3
+ " ": 50258,
4
+ " ": 50259,
5
+ " ": 50260
6
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "\t\t",
4
+ " ",
5
+ " ",
6
+ " "
7
+ ],
8
+ "bos_token": {
9
+ "content": "<|endoftext|>",
10
+ "lstrip": false,
11
+ "normalized": true,
12
+ "rstrip": false,
13
+ "single_word": false
14
+ },
15
+ "eos_token": {
16
+ "content": "<|endoftext|>",
17
+ "lstrip": false,
18
+ "normalized": true,
19
+ "rstrip": false,
20
+ "single_word": false
21
+ },
22
+ "unk_token": {
23
+ "content": "<|endoftext|>",
24
+ "lstrip": false,
25
+ "normalized": true,
26
+ "rstrip": false,
27
+ "single_word": false
28
+ }
29
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "additional_special_tokens": [
4
+ "\t\t",
5
+ " ",
6
+ " ",
7
+ " "
8
+ ],
9
+ "bos_token": {
10
+ "__type": "AddedToken",
11
+ "content": "<|endoftext|>",
12
+ "lstrip": false,
13
+ "normalized": true,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ },
17
+ "eos_token": {
18
+ "__type": "AddedToken",
19
+ "content": "<|endoftext|>",
20
+ "lstrip": false,
21
+ "normalized": true,
22
+ "rstrip": false,
23
+ "single_word": false
24
+ },
25
+ "errors": "replace",
26
+ "model_max_length": 2048,
27
+ "name_or_path": "flax-community/gpt-neo-125M-code-clippy",
28
+ "special_tokens_map_file": null,
29
+ "tokenizer_class": "GPT2Tokenizer",
30
+ "unk_token": {
31
+ "__type": "AddedToken",
32
+ "content": "<|endoftext|>",
33
+ "lstrip": false,
34
+ "normalized": true,
35
+ "rstrip": false,
36
+ "single_word": false
37
+ }
38
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff