Transformers
Volowan commited on
Commit
e8318f3
·
verified ·
1 Parent(s): 69aff18

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +133 -132
  2. tokenizer_config.json +5 -5
tokenizer.json CHANGED
@@ -13,7 +13,7 @@
13
  "special": true
14
  },
15
  {
16
- "id": 254,
17
  "content": "[bos]",
18
  "single_word": false,
19
  "lstrip": false,
@@ -22,7 +22,7 @@
22
  "special": true
23
  },
24
  {
25
- "id": 255,
26
  "content": "[pad]",
27
  "single_word": false,
28
  "lstrip": false,
@@ -31,7 +31,7 @@
31
  "special": true
32
  },
33
  {
34
- "id": 256,
35
  "content": "[eos]",
36
  "single_word": false,
37
  "lstrip": false,
@@ -40,7 +40,7 @@
40
  "special": true
41
  },
42
  {
43
- "id": 257,
44
  "content": "[sep]",
45
  "single_word": false,
46
  "lstrip": false,
@@ -49,7 +49,7 @@
49
  "special": true
50
  },
51
  {
52
- "id": 258,
53
  "content": "[unk]",
54
  "single_word": false,
55
  "lstrip": false,
@@ -212,133 +212,134 @@
212
  "H": 129,
213
  "H2": 130,
214
  "H3": 131,
215
- "[": 132,
216
- "]": 133,
217
- ":": 134,
218
- "=": 135,
219
- "#": 136,
220
- "$": 137,
221
- "\\": 138,
222
- "/": 139,
223
- "(": 140,
224
- ")": 141,
225
- ".": 142,
226
- "[prod]": 143,
227
- "[reac]": 144,
228
- "[mech]": 145,
229
- "+": 146,
230
- "-": 147,
231
- "@": 148,
232
- "*": 149,
233
- "0": 150,
234
- "1": 151,
235
- "2": 152,
236
- "3": 153,
237
- "4": 154,
238
- "5": 155,
239
- "6": 156,
240
- "7": 157,
241
- "8": 158,
242
- "9": 159,
243
- "10": 160,
244
- "11": 161,
245
- "12": 162,
246
- "13": 163,
247
- "14": 164,
248
- "15": 165,
249
- "16": 166,
250
- "17": 167,
251
- "18": 168,
252
- "19": 169,
253
- "20": 170,
254
- "21": 171,
255
- "22": 172,
256
- "23": 173,
257
- "24": 174,
258
- "25": 175,
259
- "26": 176,
260
- "27": 177,
261
- "28": 178,
262
- "29": 179,
263
- "30": 180,
264
- "31": 181,
265
- "32": 182,
266
- "33": 183,
267
- "34": 184,
268
- "35": 185,
269
- "36": 186,
270
- "37": 187,
271
- "38": 188,
272
- "39": 189,
273
- "40": 190,
274
- "41": 191,
275
- "42": 192,
276
- "43": 193,
277
- "44": 194,
278
- "45": 195,
279
- "46": 196,
280
- "47": 197,
281
- "48": 198,
282
- "49": 199,
283
- "50": 200,
284
- "51": 201,
285
- "52": 202,
286
- "53": 203,
287
- "54": 204,
288
- "55": 205,
289
- "56": 206,
290
- "57": 207,
291
- "58": 208,
292
- "59": 209,
293
- "60": 210,
294
- "61": 211,
295
- "62": 212,
296
- "63": 213,
297
- "64": 214,
298
- "65": 215,
299
- "66": 216,
300
- "67": 217,
301
- "68": 218,
302
- "69": 219,
303
- "70": 220,
304
- "71": 221,
305
- "72": 222,
306
- "73": 223,
307
- "74": 224,
308
- "75": 225,
309
- "76": 226,
310
- "77": 227,
311
- "78": 228,
312
- "79": 229,
313
- "80": 230,
314
- "81": 231,
315
- "82": 232,
316
- "83": 233,
317
- "84": 234,
318
- "85": 235,
319
- "86": 236,
320
- "87": 237,
321
- "88": 238,
322
- "89": 239,
323
- "90": 240,
324
- "91": 241,
325
- "92": 242,
326
- "93": 243,
327
- "94": 244,
328
- "95": 245,
329
- "96": 246,
330
- "97": 247,
331
- "98": 248,
332
- "99": 249,
333
- "%": 250,
334
- ",": 251,
335
- ";": 252,
336
- "|": 253,
337
- "[bos]": 254,
338
- "[pad]": 255,
339
- "[eos]": 256,
340
- "[sep]": 257,
341
- "[unk]": 258
 
342
  },
343
  "unk_token": "[unk]"
344
  }
 
13
  "special": true
14
  },
15
  {
16
+ "id": 255,
17
  "content": "[bos]",
18
  "single_word": false,
19
  "lstrip": false,
 
22
  "special": true
23
  },
24
  {
25
+ "id": 256,
26
  "content": "[pad]",
27
  "single_word": false,
28
  "lstrip": false,
 
31
  "special": true
32
  },
33
  {
34
+ "id": 257,
35
  "content": "[eos]",
36
  "single_word": false,
37
  "lstrip": false,
 
40
  "special": true
41
  },
42
  {
43
+ "id": 258,
44
  "content": "[sep]",
45
  "single_word": false,
46
  "lstrip": false,
 
49
  "special": true
50
  },
51
  {
52
+ "id": 259,
53
  "content": "[unk]",
54
  "single_word": false,
55
  "lstrip": false,
 
212
  "H": 129,
213
  "H2": 130,
214
  "H3": 131,
215
+ "H4": 132,
216
+ "[": 133,
217
+ "]": 134,
218
+ ":": 135,
219
+ "=": 136,
220
+ "#": 137,
221
+ "$": 138,
222
+ "\\": 139,
223
+ "/": 140,
224
+ "(": 141,
225
+ ")": 142,
226
+ ".": 143,
227
+ "[prod]": 144,
228
+ "[reac]": 145,
229
+ "[mech]": 146,
230
+ "+": 147,
231
+ "-": 148,
232
+ "@": 149,
233
+ "*": 150,
234
+ "0": 151,
235
+ "1": 152,
236
+ "2": 153,
237
+ "3": 154,
238
+ "4": 155,
239
+ "5": 156,
240
+ "6": 157,
241
+ "7": 158,
242
+ "8": 159,
243
+ "9": 160,
244
+ "10": 161,
245
+ "11": 162,
246
+ "12": 163,
247
+ "13": 164,
248
+ "14": 165,
249
+ "15": 166,
250
+ "16": 167,
251
+ "17": 168,
252
+ "18": 169,
253
+ "19": 170,
254
+ "20": 171,
255
+ "21": 172,
256
+ "22": 173,
257
+ "23": 174,
258
+ "24": 175,
259
+ "25": 176,
260
+ "26": 177,
261
+ "27": 178,
262
+ "28": 179,
263
+ "29": 180,
264
+ "30": 181,
265
+ "31": 182,
266
+ "32": 183,
267
+ "33": 184,
268
+ "34": 185,
269
+ "35": 186,
270
+ "36": 187,
271
+ "37": 188,
272
+ "38": 189,
273
+ "39": 190,
274
+ "40": 191,
275
+ "41": 192,
276
+ "42": 193,
277
+ "43": 194,
278
+ "44": 195,
279
+ "45": 196,
280
+ "46": 197,
281
+ "47": 198,
282
+ "48": 199,
283
+ "49": 200,
284
+ "50": 201,
285
+ "51": 202,
286
+ "52": 203,
287
+ "53": 204,
288
+ "54": 205,
289
+ "55": 206,
290
+ "56": 207,
291
+ "57": 208,
292
+ "58": 209,
293
+ "59": 210,
294
+ "60": 211,
295
+ "61": 212,
296
+ "62": 213,
297
+ "63": 214,
298
+ "64": 215,
299
+ "65": 216,
300
+ "66": 217,
301
+ "67": 218,
302
+ "68": 219,
303
+ "69": 220,
304
+ "70": 221,
305
+ "71": 222,
306
+ "72": 223,
307
+ "73": 224,
308
+ "74": 225,
309
+ "75": 226,
310
+ "76": 227,
311
+ "77": 228,
312
+ "78": 229,
313
+ "79": 230,
314
+ "80": 231,
315
+ "81": 232,
316
+ "82": 233,
317
+ "83": 234,
318
+ "84": 235,
319
+ "85": 236,
320
+ "86": 237,
321
+ "87": 238,
322
+ "88": 239,
323
+ "89": 240,
324
+ "90": 241,
325
+ "91": 242,
326
+ "92": 243,
327
+ "93": 244,
328
+ "94": 245,
329
+ "95": 246,
330
+ "96": 247,
331
+ "97": 248,
332
+ "98": 249,
333
+ "99": 250,
334
+ "%": 251,
335
+ ",": 252,
336
+ ";": 253,
337
+ "|": 254,
338
+ "[bos]": 255,
339
+ "[pad]": 256,
340
+ "[eos]": 257,
341
+ "[sep]": 258,
342
+ "[unk]": 259
343
  },
344
  "unk_token": "[unk]"
345
  }
tokenizer_config.json CHANGED
@@ -8,7 +8,7 @@
8
  "single_word": false,
9
  "special": true
10
  },
11
- "254": {
12
  "content": "[bos]",
13
  "lstrip": false,
14
  "normalized": false,
@@ -16,7 +16,7 @@
16
  "single_word": false,
17
  "special": true
18
  },
19
- "255": {
20
  "content": "[pad]",
21
  "lstrip": false,
22
  "normalized": false,
@@ -24,7 +24,7 @@
24
  "single_word": false,
25
  "special": true
26
  },
27
- "256": {
28
  "content": "[eos]",
29
  "lstrip": false,
30
  "normalized": false,
@@ -32,7 +32,7 @@
32
  "single_word": false,
33
  "special": true
34
  },
35
- "257": {
36
  "content": "[sep]",
37
  "lstrip": false,
38
  "normalized": false,
@@ -40,7 +40,7 @@
40
  "single_word": false,
41
  "special": true
42
  },
43
- "258": {
44
  "content": "[unk]",
45
  "lstrip": false,
46
  "normalized": false,
 
8
  "single_word": false,
9
  "special": true
10
  },
11
+ "255": {
12
  "content": "[bos]",
13
  "lstrip": false,
14
  "normalized": false,
 
16
  "single_word": false,
17
  "special": true
18
  },
19
+ "256": {
20
  "content": "[pad]",
21
  "lstrip": false,
22
  "normalized": false,
 
24
  "single_word": false,
25
  "special": true
26
  },
27
+ "257": {
28
  "content": "[eos]",
29
  "lstrip": false,
30
  "normalized": false,
 
32
  "single_word": false,
33
  "special": true
34
  },
35
+ "258": {
36
  "content": "[sep]",
37
  "lstrip": false,
38
  "normalized": false,
 
40
  "single_word": false,
41
  "special": true
42
  },
43
+ "259": {
44
  "content": "[unk]",
45
  "lstrip": false,
46
  "normalized": false,