Transformers
Eli2381 commited on
Commit
18c6ab3
·
verified ·
1 Parent(s): 9df32ff

Upload tokenizer

Browse files
Files changed (4) hide show
  1. special_tokens_map.json +7 -0
  2. tokenizer.json +316 -306
  3. tokenizer_config.json +10 -1
  4. vocab.json +1 -1
special_tokens_map.json CHANGED
@@ -19,6 +19,13 @@
19
  "rstrip": false,
20
  "single_word": false
21
  },
 
 
 
 
 
 
 
22
  "unk_token": {
23
  "content": "UNK",
24
  "lstrip": false,
 
19
  "rstrip": false,
20
  "single_word": false
21
  },
22
+ "pad_token": {
23
+ "content": "PAD",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false
28
+ },
29
  "unk_token": {
30
  "content": "UNK",
31
  "lstrip": false,
tokenizer.json CHANGED
@@ -59,7 +59,7 @@
59
  },
60
  {
61
  "id": 6,
62
- "content": "EoT",
63
  "single_word": false,
64
  "lstrip": false,
65
  "rstrip": false,
@@ -68,6 +68,15 @@
68
  },
69
  {
70
  "id": 7,
 
 
 
 
 
 
 
 
 
71
  "content": "BoT",
72
  "single_word": false,
73
  "lstrip": false,
@@ -111,311 +120,312 @@
111
  "BoS": 3,
112
  "EoS": 4,
113
  "UNK": 5,
114
- "EoT": 6,
115
- "BoT": 7,
116
- "!": 8,
117
- "\"": 9,
118
- "#": 10,
119
- "$": 11,
120
- "%": 12,
121
- "&": 13,
122
- "'": 14,
123
- "(": 15,
124
- ")": 16,
125
- "*": 17,
126
- "+": 18,
127
- ",": 19,
128
- "-": 20,
129
- ".": 21,
130
- "/": 22,
131
- "0": 23,
132
- "1": 24,
133
- "2": 25,
134
- "3": 26,
135
- "4": 27,
136
- "5": 28,
137
- "6": 29,
138
- "7": 30,
139
- "8": 31,
140
- "9": 32,
141
- ":": 33,
142
- ";": 34,
143
- "<": 35,
144
- "=": 36,
145
- ">": 37,
146
- "?": 38,
147
- "@": 39,
148
- "A": 40,
149
- "B": 41,
150
- "C": 42,
151
- "D": 43,
152
- "E": 44,
153
- "F": 45,
154
- "G": 46,
155
- "H": 47,
156
- "I": 48,
157
- "J": 49,
158
- "K": 50,
159
- "L": 51,
160
- "M": 52,
161
- "N": 53,
162
- "O": 54,
163
- "P": 55,
164
- "Q": 56,
165
- "R": 57,
166
- "S": 58,
167
- "T": 59,
168
- "U": 60,
169
- "V": 61,
170
- "W": 62,
171
- "X": 63,
172
- "Y": 64,
173
- "Z": 65,
174
- "[": 66,
175
- "\\": 67,
176
- "]": 68,
177
- "^": 69,
178
- "_": 70,
179
- "`": 71,
180
- "a": 72,
181
- "b": 73,
182
- "c": 74,
183
- "d": 75,
184
- "e": 76,
185
- "f": 77,
186
- "g": 78,
187
- "h": 79,
188
- "i": 80,
189
- "j": 81,
190
- "k": 82,
191
- "l": 83,
192
- "m": 84,
193
- "n": 85,
194
- "o": 86,
195
- "p": 87,
196
- "q": 88,
197
- "r": 89,
198
- "s": 90,
199
- "t": 91,
200
- "u": 92,
201
- "v": 93,
202
- "w": 94,
203
- "x": 95,
204
- "y": 96,
205
- "z": 97,
206
- "{": 98,
207
- "|": 99,
208
- "}": 100,
209
- "~": 101,
210
- "¡": 102,
211
- "¢": 103,
212
- "£": 104,
213
- "¤": 105,
214
- "¥": 106,
215
- "¦": 107,
216
- "§": 108,
217
- "¨": 109,
218
- "©": 110,
219
- "ª": 111,
220
- "«": 112,
221
- "¬": 113,
222
- "®": 114,
223
- "¯": 115,
224
- "°": 116,
225
- "±": 117,
226
- "²": 118,
227
- "³": 119,
228
- "´": 120,
229
- "µ": 121,
230
- "": 122,
231
- "·": 123,
232
- "¸": 124,
233
- "¹": 125,
234
- "º": 126,
235
- "»": 127,
236
- "¼": 128,
237
- "½": 129,
238
- "¾": 130,
239
- "¿": 131,
240
- "À": 132,
241
- "Á": 133,
242
- "Â": 134,
243
- "Ã": 135,
244
- "Ä": 136,
245
- "Å": 137,
246
- "Æ": 138,
247
- "Ç": 139,
248
- "È": 140,
249
- "É": 141,
250
- "Ê": 142,
251
- "Ë": 143,
252
- "Ì": 144,
253
- "Í": 145,
254
- "Î": 146,
255
- "Ï": 147,
256
- "Ð": 148,
257
- "Ñ": 149,
258
- "Ò": 150,
259
- "Ó": 151,
260
- "Ô": 152,
261
- "Õ": 153,
262
- "Ö": 154,
263
- "×": 155,
264
- "Ø": 156,
265
- "Ù": 157,
266
- "Ú": 158,
267
- "Û": 159,
268
- "Ü": 160,
269
- "Ý": 161,
270
- "Þ": 162,
271
- "ß": 163,
272
- "à": 164,
273
- "á": 165,
274
- "â": 166,
275
- "ã": 167,
276
- "ä": 168,
277
- "å": 169,
278
- "æ": 170,
279
- "ç": 171,
280
- "è": 172,
281
- "é": 173,
282
- "ê": 174,
283
- "ë": 175,
284
- "ì": 176,
285
- "í": 177,
286
- "î": 178,
287
- "ï": 179,
288
- "ð": 180,
289
- "ñ": 181,
290
- "ò": 182,
291
- "ó": 183,
292
- "ô": 184,
293
- "õ": 185,
294
- "ö": 186,
295
- "÷": 187,
296
- "ø": 188,
297
- "ù": 189,
298
- "ú": 190,
299
- "û": 191,
300
- "ü": 192,
301
- "ý": 193,
302
- "þ": 194,
303
- "ÿ": 195,
304
- "Ā": 196,
305
- "ā": 197,
306
- "Ă": 198,
307
- "ă": 199,
308
- "Ą": 200,
309
- "ą": 201,
310
- "Ć": 202,
311
- "ć": 203,
312
- "Ĉ": 204,
313
- "ĉ": 205,
314
- "Ċ": 206,
315
- "ċ": 207,
316
- "Č": 208,
317
- "č": 209,
318
- "Ď": 210,
319
- "ď": 211,
320
- "Đ": 212,
321
- "đ": 213,
322
- "Ē": 214,
323
- "ē": 215,
324
- "Ĕ": 216,
325
- "ĕ": 217,
326
- "Ė": 218,
327
- "ė": 219,
328
- "Ę": 220,
329
- "ę": 221,
330
- "Ě": 222,
331
- "ě": 223,
332
- "Ĝ": 224,
333
- "ĝ": 225,
334
- "Ğ": 226,
335
- "ğ": 227,
336
- "Ġ": 228,
337
- "ġ": 229,
338
- "Ģ": 230,
339
- "ģ": 231,
340
- "Ĥ": 232,
341
- "ĥ": 233,
342
- "Ħ": 234,
343
- "ħ": 235,
344
- "Ĩ": 236,
345
- "ĩ": 237,
346
- "Ī": 238,
347
- "ī": 239,
348
- "Ĭ": 240,
349
- "ĭ": 241,
350
- "Į": 242,
351
- "į": 243,
352
- "İ": 244,
353
- "ı": 245,
354
- "IJ": 246,
355
- "ij": 247,
356
- "Ĵ": 248,
357
- "ĵ": 249,
358
- "Ķ": 250,
359
- "ķ": 251,
360
- "ĸ": 252,
361
- "Ĺ": 253,
362
- "ĺ": 254,
363
- "Ļ": 255,
364
- "ļ": 256,
365
- "Ľ": 257,
366
- "ľ": 258,
367
- "Ŀ": 259,
368
- "ŀ": 260,
369
- "Ł": 261,
370
- "ł": 262,
371
- "Ń": 263,
372
- "Ġn": 264,
373
- "Ġ|": 265,
374
- "ah": 266,
375
- "Ġah": 267,
376
- "Ġaha": 268,
377
- "Ġ1": 269,
378
- "Ġ4": 270,
379
- "Ġ3": 271,
380
- "Ġ2": 272,
381
- "ai": 273,
382
- "wai": 274,
383
- "Ġwai": 275,
384
- "Ġwait": 276,
385
- "10": 277,
386
- "Ġ[": 278,
387
- ]": 279,
388
- "Ġl": 280,
389
- "11": 281,
390
- "12": 282,
391
- "Ġ6": 283,
392
- "Ġ7": 284,
393
- "Bo": 285,
394
- "Eo": 286,
395
- "ĠEo": 287,
396
- "Ġ5": 288,
397
- "Ġ8": 289,
398
- "13": 290,
399
- "Ġ9": 291,
400
- "Ġ10": 292,
401
- "ĠBo": 293,
402
- "ĠEoS": 294,
403
- "ĠEoT": 295,
404
- "ĠBoT": 296,
405
- "14": 297,
406
- "Ġ11": 298,
407
- "Ġ12": 299,
408
- "15": 300,
409
- "Ġ13": 301,
410
- "Ġ14": 302,
411
- "16": 303,
412
- "Ġ15": 304,
413
- "Ġ16": 305,
414
- "17": 306,
415
- "Ġ17": 307,
416
- "Ġ18": 308,
417
- "Ġ19": 309,
418
- "Ġ20": 310
 
419
  },
420
  "merges": [
421
  [
 
59
  },
60
  {
61
  "id": 6,
62
+ "content": "PAD",
63
  "single_word": false,
64
  "lstrip": false,
65
  "rstrip": false,
 
68
  },
69
  {
70
  "id": 7,
71
+ "content": "EoT",
72
+ "single_word": false,
73
+ "lstrip": false,
74
+ "rstrip": false,
75
+ "normalized": false,
76
+ "special": true
77
+ },
78
+ {
79
+ "id": 8,
80
  "content": "BoT",
81
  "single_word": false,
82
  "lstrip": false,
 
120
  "BoS": 3,
121
  "EoS": 4,
122
  "UNK": 5,
123
+ "PAD": 6,
124
+ "EoT": 7,
125
+ "BoT": 8,
126
+ "!": 9,
127
+ "\"": 10,
128
+ "#": 11,
129
+ "$": 12,
130
+ "%": 13,
131
+ "&": 14,
132
+ "'": 15,
133
+ "(": 16,
134
+ ")": 17,
135
+ "*": 18,
136
+ "+": 19,
137
+ ",": 20,
138
+ "-": 21,
139
+ ".": 22,
140
+ "/": 23,
141
+ "0": 24,
142
+ "1": 25,
143
+ "2": 26,
144
+ "3": 27,
145
+ "4": 28,
146
+ "5": 29,
147
+ "6": 30,
148
+ "7": 31,
149
+ "8": 32,
150
+ "9": 33,
151
+ ":": 34,
152
+ ";": 35,
153
+ "<": 36,
154
+ "=": 37,
155
+ ">": 38,
156
+ "?": 39,
157
+ "@": 40,
158
+ "A": 41,
159
+ "B": 42,
160
+ "C": 43,
161
+ "D": 44,
162
+ "E": 45,
163
+ "F": 46,
164
+ "G": 47,
165
+ "H": 48,
166
+ "I": 49,
167
+ "J": 50,
168
+ "K": 51,
169
+ "L": 52,
170
+ "M": 53,
171
+ "N": 54,
172
+ "O": 55,
173
+ "P": 56,
174
+ "Q": 57,
175
+ "R": 58,
176
+ "S": 59,
177
+ "T": 60,
178
+ "U": 61,
179
+ "V": 62,
180
+ "W": 63,
181
+ "X": 64,
182
+ "Y": 65,
183
+ "Z": 66,
184
+ "[": 67,
185
+ "\\": 68,
186
+ "]": 69,
187
+ "^": 70,
188
+ "_": 71,
189
+ "`": 72,
190
+ "a": 73,
191
+ "b": 74,
192
+ "c": 75,
193
+ "d": 76,
194
+ "e": 77,
195
+ "f": 78,
196
+ "g": 79,
197
+ "h": 80,
198
+ "i": 81,
199
+ "j": 82,
200
+ "k": 83,
201
+ "l": 84,
202
+ "m": 85,
203
+ "n": 86,
204
+ "o": 87,
205
+ "p": 88,
206
+ "q": 89,
207
+ "r": 90,
208
+ "s": 91,
209
+ "t": 92,
210
+ "u": 93,
211
+ "v": 94,
212
+ "w": 95,
213
+ "x": 96,
214
+ "y": 97,
215
+ "z": 98,
216
+ "{": 99,
217
+ "|": 100,
218
+ "}": 101,
219
+ "~": 102,
220
+ "¡": 103,
221
+ "¢": 104,
222
+ "£": 105,
223
+ "¤": 106,
224
+ "¥": 107,
225
+ "¦": 108,
226
+ "§": 109,
227
+ "¨": 110,
228
+ "©": 111,
229
+ "ª": 112,
230
+ "«": 113,
231
+ "¬": 114,
232
+ "®": 115,
233
+ "¯": 116,
234
+ "°": 117,
235
+ "±": 118,
236
+ "²": 119,
237
+ "³": 120,
238
+ "´": 121,
239
+ "µ": 122,
240
+ "": 123,
241
+ "·": 124,
242
+ "¸": 125,
243
+ "¹": 126,
244
+ "º": 127,
245
+ "»": 128,
246
+ "¼": 129,
247
+ "½": 130,
248
+ "¾": 131,
249
+ "¿": 132,
250
+ "À": 133,
251
+ "Á": 134,
252
+ "Â": 135,
253
+ "Ã": 136,
254
+ "Ä": 137,
255
+ "Å": 138,
256
+ "Æ": 139,
257
+ "Ç": 140,
258
+ "È": 141,
259
+ "É": 142,
260
+ "Ê": 143,
261
+ "Ë": 144,
262
+ "Ì": 145,
263
+ "Í": 146,
264
+ "Î": 147,
265
+ "Ï": 148,
266
+ "Ð": 149,
267
+ "Ñ": 150,
268
+ "Ò": 151,
269
+ "Ó": 152,
270
+ "Ô": 153,
271
+ "Õ": 154,
272
+ "Ö": 155,
273
+ "×": 156,
274
+ "Ø": 157,
275
+ "Ù": 158,
276
+ "Ú": 159,
277
+ "Û": 160,
278
+ "Ü": 161,
279
+ "Ý": 162,
280
+ "Þ": 163,
281
+ "ß": 164,
282
+ "à": 165,
283
+ "á": 166,
284
+ "â": 167,
285
+ "ã": 168,
286
+ "ä": 169,
287
+ "å": 170,
288
+ "æ": 171,
289
+ "ç": 172,
290
+ "è": 173,
291
+ "é": 174,
292
+ "ê": 175,
293
+ "ë": 176,
294
+ "ì": 177,
295
+ "í": 178,
296
+ "î": 179,
297
+ "ï": 180,
298
+ "ð": 181,
299
+ "ñ": 182,
300
+ "ò": 183,
301
+ "ó": 184,
302
+ "ô": 185,
303
+ "õ": 186,
304
+ "ö": 187,
305
+ "÷": 188,
306
+ "ø": 189,
307
+ "ù": 190,
308
+ "ú": 191,
309
+ "û": 192,
310
+ "ü": 193,
311
+ "ý": 194,
312
+ "þ": 195,
313
+ "ÿ": 196,
314
+ "Ā": 197,
315
+ "ā": 198,
316
+ "Ă": 199,
317
+ "ă": 200,
318
+ "Ą": 201,
319
+ "ą": 202,
320
+ "Ć": 203,
321
+ "ć": 204,
322
+ "Ĉ": 205,
323
+ "ĉ": 206,
324
+ "Ċ": 207,
325
+ "ċ": 208,
326
+ "Č": 209,
327
+ "č": 210,
328
+ "Ď": 211,
329
+ "ď": 212,
330
+ "Đ": 213,
331
+ "đ": 214,
332
+ "Ē": 215,
333
+ "ē": 216,
334
+ "Ĕ": 217,
335
+ "ĕ": 218,
336
+ "Ė": 219,
337
+ "ė": 220,
338
+ "Ę": 221,
339
+ "ę": 222,
340
+ "Ě": 223,
341
+ "ě": 224,
342
+ "Ĝ": 225,
343
+ "ĝ": 226,
344
+ "Ğ": 227,
345
+ "ğ": 228,
346
+ "Ġ": 229,
347
+ "ġ": 230,
348
+ "Ģ": 231,
349
+ "ģ": 232,
350
+ "Ĥ": 233,
351
+ "ĥ": 234,
352
+ "Ħ": 235,
353
+ "ħ": 236,
354
+ "Ĩ": 237,
355
+ "ĩ": 238,
356
+ "Ī": 239,
357
+ "ī": 240,
358
+ "Ĭ": 241,
359
+ "ĭ": 242,
360
+ "Į": 243,
361
+ "į": 244,
362
+ "İ": 245,
363
+ "ı": 246,
364
+ "IJ": 247,
365
+ "ij": 248,
366
+ "Ĵ": 249,
367
+ "ĵ": 250,
368
+ "Ķ": 251,
369
+ "ķ": 252,
370
+ "ĸ": 253,
371
+ "Ĺ": 254,
372
+ "ĺ": 255,
373
+ "Ļ": 256,
374
+ "ļ": 257,
375
+ "Ľ": 258,
376
+ "ľ": 259,
377
+ "Ŀ": 260,
378
+ "ŀ": 261,
379
+ "Ł": 262,
380
+ "ł": 263,
381
+ "Ń": 264,
382
+ "Ġn": 265,
383
+ "Ġ|": 266,
384
+ "ah": 267,
385
+ "Ġah": 268,
386
+ "Ġaha": 269,
387
+ "Ġ1": 270,
388
+ "Ġ4": 271,
389
+ "Ġ3": 272,
390
+ "Ġ2": 273,
391
+ "ai": 274,
392
+ "wai": 275,
393
+ "Ġwai": 276,
394
+ "Ġwait": 277,
395
+ "10": 278,
396
+ [": 279,
397
+ "Ġ]": 280,
398
+ "Ġl": 281,
399
+ "11": 282,
400
+ "12": 283,
401
+ "Ġ6": 284,
402
+ "Ġ7": 285,
403
+ "Bo": 286,
404
+ "Eo": 287,
405
+ "ĠEo": 288,
406
+ "Ġ5": 289,
407
+ "Ġ8": 290,
408
+ "13": 291,
409
+ "Ġ9": 292,
410
+ "Ġ10": 293,
411
+ "ĠBo": 294,
412
+ "ĠEoS": 295,
413
+ "ĠEoT": 296,
414
+ "ĠBoT": 297,
415
+ "14": 298,
416
+ "Ġ11": 299,
417
+ "Ġ12": 300,
418
+ "15": 301,
419
+ "Ġ13": 302,
420
+ "Ġ14": 303,
421
+ "16": 304,
422
+ "Ġ15": 305,
423
+ "Ġ16": 306,
424
+ "17": 307,
425
+ "Ġ17": 308,
426
+ "Ġ18": 309,
427
+ "Ġ19": 310,
428
+ "Ġ20": 311
429
  },
430
  "merges": [
431
  [
tokenizer_config.json CHANGED
@@ -50,7 +50,7 @@
50
  "special": true
51
  },
52
  "6": {
53
- "content": "EoT",
54
  "lstrip": false,
55
  "normalized": false,
56
  "rstrip": false,
@@ -58,6 +58,14 @@
58
  "special": true
59
  },
60
  "7": {
 
 
 
 
 
 
 
 
61
  "content": "BoT",
62
  "lstrip": false,
63
  "normalized": false,
@@ -77,6 +85,7 @@
77
  "eos_token": "EoS",
78
  "extra_special_tokens": {},
79
  "model_max_length": 1024,
 
80
  "tokenizer_class": "GPT2Tokenizer",
81
  "unk_token": "UNK"
82
  }
 
50
  "special": true
51
  },
52
  "6": {
53
+ "content": "PAD",
54
  "lstrip": false,
55
  "normalized": false,
56
  "rstrip": false,
 
58
  "special": true
59
  },
60
  "7": {
61
+ "content": "EoT",
62
+ "lstrip": false,
63
+ "normalized": false,
64
+ "rstrip": false,
65
+ "single_word": false,
66
+ "special": true
67
+ },
68
+ "8": {
69
  "content": "BoT",
70
  "lstrip": false,
71
  "normalized": false,
 
85
  "eos_token": "EoS",
86
  "extra_special_tokens": {},
87
  "model_max_length": 1024,
88
+ "pad_token": "PAD",
89
  "tokenizer_class": "GPT2Tokenizer",
90
  "unk_token": "UNK"
91
  }
vocab.json CHANGED
@@ -1 +1 @@
1
- {"aha":0,"wait":1,"<|endoftext|>":2,"BoS":3,"EoS":4,"UNK":5,"EoT":6,"BoT":7,"!":8,"\"":9,"#":10,"$":11,"%":12,"&":13,"'":14,"(":15,")":16,"*":17,"+":18,",":19,"-":20,".":21,"/":22,"0":23,"1":24,"2":25,"3":26,"4":27,"5":28,"6":29,"7":30,"8":31,"9":32,":":33,";":34,"<":35,"=":36,">":37,"?":38,"@":39,"A":40,"B":41,"C":42,"D":43,"E":44,"F":45,"G":46,"H":47,"I":48,"J":49,"K":50,"L":51,"M":52,"N":53,"O":54,"P":55,"Q":56,"R":57,"S":58,"T":59,"U":60,"V":61,"W":62,"X":63,"Y":64,"Z":65,"[":66,"\\":67,"]":68,"^":69,"_":70,"`":71,"a":72,"b":73,"c":74,"d":75,"e":76,"f":77,"g":78,"h":79,"i":80,"j":81,"k":82,"l":83,"m":84,"n":85,"o":86,"p":87,"q":88,"r":89,"s":90,"t":91,"u":92,"v":93,"w":94,"x":95,"y":96,"z":97,"{":98,"|":99,"}":100,"~":101,"¡":102,"¢":103,"£":104,"¤":105,"¥":106,"¦":107,"§":108,"¨":109,"©":110,"ª":111,"«":112,"¬":113,"®":114,"¯":115,"°":116,"±":117,"²":118,"³":119,"´":120,"µ":121,"¶":122,"·":123,"¸":124,"¹":125,"º":126,"»":127,"¼":128,"½":129,"¾":130,"¿":131,"À":132,"Á":133,"Â":134,"Ã":135,"Ä":136,"Å":137,"Æ":138,"Ç":139,"È":140,"É":141,"Ê":142,"Ë":143,"Ì":144,"Í":145,"Î":146,"Ï":147,"Ð":148,"Ñ":149,"Ò":150,"Ó":151,"Ô":152,"Õ":153,"Ö":154,"×":155,"Ø":156,"Ù":157,"Ú":158,"Û":159,"Ü":160,"Ý":161,"Þ":162,"ß":163,"à":164,"á":165,"â":166,"ã":167,"ä":168,"å":169,"æ":170,"ç":171,"è":172,"é":173,"ê":174,"ë":175,"ì":176,"í":177,"î":178,"ï":179,"ð":180,"ñ":181,"ò":182,"ó":183,"ô":184,"õ":185,"ö":186,"÷":187,"ø":188,"ù":189,"ú":190,"û":191,"ü":192,"ý":193,"þ":194,"ÿ":195,"Ā":196,"ā":197,"Ă":198,"ă":199,"Ą":200,"ą":201,"Ć":202,"ć":203,"Ĉ":204,"ĉ":205,"Ċ":206,"ċ":207,"Č":208,"č":209,"Ď":210,"ď":211,"Đ":212,"đ":213,"Ē":214,"ē":215,"Ĕ":216,"ĕ":217,"Ė":218,"ė":219,"Ę":220,"ę":221,"Ě":222,"ě":223,"Ĝ":224,"ĝ":225,"Ğ":226,"ğ":227,"Ġ":228,"ġ":229,"Ģ":230,"ģ":231,"Ĥ":232,"ĥ":233,"Ħ":234,"ħ":235,"Ĩ":236,"ĩ":237,"Ī":238,"ī":239,"Ĭ":240,"ĭ":241,"Į":242,"į":243,"İ":244,"ı":245,"IJ":246,"ij":247,"Ĵ":248,"ĵ":249,"Ķ":250,"ķ":251,"ĸ":252,"Ĺ":253,"ĺ":254,"Ļ":255,"ļ":256,"Ľ":257,"ľ":258,"Ŀ":259,"ŀ":260,"Ł":261,"ł":262,"Ń":263,"Ġn":264,"Ġ|":265,"ah":266,"Ġah":267,"Ġaha":268,"Ġ1":269,"Ġ4":270,"Ġ3":271,"Ġ2":272,"ai":273,"wai":274,"Ġwai":275,"Ġwait":276,"10":277,"Ġ[":278,"Ġ]":279,"Ġl":280,"11":281,"12":282,"Ġ6":283,"Ġ7":284,"Bo":285,"Eo":286,"ĠEo":287,"Ġ5":288,"Ġ8":289,"13":290,"Ġ9":291,"Ġ10":292,"ĠBo":293,"ĠEoS":294,"ĠEoT":295,"ĠBoT":296,"14":297,"Ġ11":298,"Ġ12":299,"15":300,"Ġ13":301,"Ġ14":302,"16":303,"Ġ15":304,"Ġ16":305,"17":306,"Ġ17":307,"Ġ18":308,"Ġ19":309,"Ġ20":310}
 
1
+ {"aha":0,"wait":1,"<|endoftext|>":2,"BoS":3,"EoS":4,"UNK":5,"PAD":6,"EoT":7,"BoT":8,"!":9,"\"":10,"#":11,"$":12,"%":13,"&":14,"'":15,"(":16,")":17,"*":18,"+":19,",":20,"-":21,".":22,"/":23,"0":24,"1":25,"2":26,"3":27,"4":28,"5":29,"6":30,"7":31,"8":32,"9":33,":":34,";":35,"<":36,"=":37,">":38,"?":39,"@":40,"A":41,"B":42,"C":43,"D":44,"E":45,"F":46,"G":47,"H":48,"I":49,"J":50,"K":51,"L":52,"M":53,"N":54,"O":55,"P":56,"Q":57,"R":58,"S":59,"T":60,"U":61,"V":62,"W":63,"X":64,"Y":65,"Z":66,"[":67,"\\":68,"]":69,"^":70,"_":71,"`":72,"a":73,"b":74,"c":75,"d":76,"e":77,"f":78,"g":79,"h":80,"i":81,"j":82,"k":83,"l":84,"m":85,"n":86,"o":87,"p":88,"q":89,"r":90,"s":91,"t":92,"u":93,"v":94,"w":95,"x":96,"y":97,"z":98,"{":99,"|":100,"}":101,"~":102,"¡":103,"¢":104,"£":105,"¤":106,"¥":107,"¦":108,"§":109,"¨":110,"©":111,"ª":112,"«":113,"¬":114,"®":115,"¯":116,"°":117,"±":118,"²":119,"³":120,"´":121,"µ":122,"¶":123,"·":124,"¸":125,"¹":126,"º":127,"»":128,"¼":129,"½":130,"¾":131,"¿":132,"À":133,"Á":134,"Â":135,"Ã":136,"Ä":137,"Å":138,"Æ":139,"Ç":140,"È":141,"É":142,"Ê":143,"Ë":144,"Ì":145,"Í":146,"Î":147,"Ï":148,"Ð":149,"Ñ":150,"Ò":151,"Ó":152,"Ô":153,"Õ":154,"Ö":155,"×":156,"Ø":157,"Ù":158,"Ú":159,"Û":160,"Ü":161,"Ý":162,"Þ":163,"ß":164,"à":165,"á":166,"â":167,"ã":168,"ä":169,"å":170,"æ":171,"ç":172,"è":173,"é":174,"ê":175,"ë":176,"ì":177,"í":178,"î":179,"ï":180,"ð":181,"ñ":182,"ò":183,"ó":184,"ô":185,"õ":186,"ö":187,"÷":188,"ø":189,"ù":190,"ú":191,"û":192,"ü":193,"ý":194,"þ":195,"ÿ":196,"Ā":197,"ā":198,"Ă":199,"ă":200,"Ą":201,"ą":202,"Ć":203,"ć":204,"Ĉ":205,"ĉ":206,"Ċ":207,"ċ":208,"Č":209,"č":210,"Ď":211,"ď":212,"Đ":213,"đ":214,"Ē":215,"ē":216,"Ĕ":217,"ĕ":218,"Ė":219,"ė":220,"Ę":221,"ę":222,"Ě":223,"ě":224,"Ĝ":225,"ĝ":226,"Ğ":227,"ğ":228,"Ġ":229,"ġ":230,"Ģ":231,"ģ":232,"Ĥ":233,"ĥ":234,"Ħ":235,"ħ":236,"Ĩ":237,"ĩ":238,"Ī":239,"ī":240,"Ĭ":241,"ĭ":242,"Į":243,"į":244,"İ":245,"ı":246,"IJ":247,"ij":248,"Ĵ":249,"ĵ":250,"Ķ":251,"ķ":252,"ĸ":253,"Ĺ":254,"ĺ":255,"Ļ":256,"ļ":257,"Ľ":258,"ľ":259,"Ŀ":260,"ŀ":261,"Ł":262,"ł":263,"Ń":264,"Ġn":265,"Ġ|":266,"ah":267,"Ġah":268,"Ġaha":269,"Ġ1":270,"Ġ4":271,"Ġ3":272,"Ġ2":273,"ai":274,"wai":275,"Ġwai":276,"Ġwait":277,"10":278,"Ġ[":279,"Ġ]":280,"Ġl":281,"11":282,"12":283,"Ġ6":284,"Ġ7":285,"Bo":286,"Eo":287,"ĠEo":288,"Ġ5":289,"Ġ8":290,"13":291,"Ġ9":292,"Ġ10":293,"ĠBo":294,"ĠEoS":295,"ĠEoT":296,"ĠBoT":297,"14":298,"Ġ11":299,"Ġ12":300,"15":301,"Ġ13":302,"Ġ14":303,"16":304,"Ġ15":305,"Ġ16":306,"17":307,"Ġ17":308,"Ġ18":309,"Ġ19":310,"Ġ20":311}