Taratep commited on
Commit
3a80de9
·
verified ·
1 Parent(s): e0d14d6

Upload tokenizer

Browse files
added_tokens.json CHANGED
@@ -1,6 +1,5 @@
1
  {
2
  "<|endoftext|>": 50265,
3
- "<|pad|>": 50268,
4
  "<|startoftext|>": 50266,
5
  "<|unk|>": 50267
6
  }
 
1
  {
2
  "<|endoftext|>": 50265,
 
3
  "<|startoftext|>": 50266,
4
  "<|unk|>": 50267
5
  }
special_tokens_map.json CHANGED
@@ -13,13 +13,7 @@
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
- "pad_token": {
17
- "content": "<|pad|>",
18
- "lstrip": false,
19
- "normalized": true,
20
- "rstrip": false,
21
- "single_word": false
22
- },
23
  "unk_token": {
24
  "content": "<|unk|>",
25
  "lstrip": false,
 
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
+ "pad_token": "<|endoftext|>",
 
 
 
 
 
 
17
  "unk_token": {
18
  "content": "<|unk|>",
19
  "lstrip": false,
tokenizer_config.json CHANGED
@@ -65,14 +65,6 @@
65
  "rstrip": false,
66
  "single_word": false,
67
  "special": true
68
- },
69
- "50268": {
70
- "content": "<|pad|>",
71
- "lstrip": false,
72
- "normalized": true,
73
- "rstrip": false,
74
- "single_word": false,
75
- "special": true
76
  }
77
  },
78
  "bos_token": "<|startoftext|>",
@@ -81,7 +73,7 @@
81
  "errors": "replace",
82
  "extra_special_tokens": {},
83
  "model_max_length": 1000000000000000019884624838656,
84
- "pad_token": "<|pad|>",
85
  "tokenizer_class": "GPT2Tokenizer",
86
  "unk_token": "<|unk|>"
87
  }
 
65
  "rstrip": false,
66
  "single_word": false,
67
  "special": true
 
 
 
 
 
 
 
 
68
  }
69
  },
70
  "bos_token": "<|startoftext|>",
 
73
  "errors": "replace",
74
  "extra_special_tokens": {},
75
  "model_max_length": 1000000000000000019884624838656,
76
+ "pad_token": "<|endoftext|>",
77
  "tokenizer_class": "GPT2Tokenizer",
78
  "unk_token": "<|unk|>"
79
  }