sujithatz commited on
Commit
9c093c9
·
verified ·
1 Parent(s): 357bf77

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +1 -29
  2. tokenizer_config.json +3 -3
tokenizer.json CHANGED
@@ -155,12 +155,6 @@
155
  "id": "A",
156
  "type_id": 0
157
  }
158
- },
159
- {
160
- "SpecialToken": {
161
- "id": "<|endoftext|>",
162
- "type_id": 0
163
- }
164
  }
165
  ],
166
  "pair": [
@@ -170,36 +164,14 @@
170
  "type_id": 0
171
  }
172
  },
173
- {
174
- "SpecialToken": {
175
- "id": "<|endoftext|>",
176
- "type_id": 0
177
- }
178
- },
179
  {
180
  "Sequence": {
181
  "id": "B",
182
  "type_id": 1
183
  }
184
- },
185
- {
186
- "SpecialToken": {
187
- "id": "<|endoftext|>",
188
- "type_id": 1
189
- }
190
  }
191
  ],
192
- "special_tokens": {
193
- "<|endoftext|>": {
194
- "id": "<|endoftext|>",
195
- "ids": [
196
- 32000
197
- ],
198
- "tokens": [
199
- "<|endoftext|>"
200
- ]
201
- }
202
- }
203
  },
204
  "decoder": {
205
  "type": "Sequence",
 
155
  "id": "A",
156
  "type_id": 0
157
  }
 
 
 
 
 
 
158
  }
159
  ],
160
  "pair": [
 
164
  "type_id": 0
165
  }
166
  },
 
 
 
 
 
 
167
  {
168
  "Sequence": {
169
  "id": "B",
170
  "type_id": 1
171
  }
 
 
 
 
 
 
172
  }
173
  ],
174
+ "special_tokens": {}
 
 
 
 
 
 
 
 
 
 
175
  },
176
  "decoder": {
177
  "type": "Sequence",
tokenizer_config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "add_bos_token": false,
3
- "add_eos_token": true,
4
  "add_prefix_space": null,
5
  "added_tokens_decoder": {
6
  "0": {
@@ -121,9 +121,9 @@
121
  "clean_up_tokenization_spaces": false,
122
  "eos_token": "<|endoftext|>",
123
  "legacy": false,
124
- "model_max_length": 131072,
125
  "pad_token": "<unk>",
126
- "padding_side": "left",
127
  "sp_model_kwargs": {},
128
  "tokenizer_class": "LlamaTokenizer",
129
  "unk_token": "<unk>",
 
1
  {
2
  "add_bos_token": false,
3
+ "add_eos_token": false,
4
  "add_prefix_space": null,
5
  "added_tokens_decoder": {
6
  "0": {
 
121
  "clean_up_tokenization_spaces": false,
122
  "eos_token": "<|endoftext|>",
123
  "legacy": false,
124
+ "model_max_length": 300,
125
  "pad_token": "<unk>",
126
+ "padding_side": "right",
127
  "sp_model_kwargs": {},
128
  "tokenizer_class": "LlamaTokenizer",
129
  "unk_token": "<unk>",