windowsartes commited on
Commit
12d95fc
·
verified ·
1 Parent(s): 99adeb8

Upload tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +37 -0
  2. tokenizer.json +1061 -0
  3. tokenizer_config.json +59 -0
special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
tokenizer.json ADDED
@@ -0,0 +1,1061 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [
6
+ {
7
+ "id": 0,
8
+ "content": "[MASK]",
9
+ "single_word": false,
10
+ "lstrip": false,
11
+ "rstrip": false,
12
+ "normalized": false,
13
+ "special": true
14
+ },
15
+ {
16
+ "id": 1,
17
+ "content": "[PAD]",
18
+ "single_word": false,
19
+ "lstrip": false,
20
+ "rstrip": false,
21
+ "normalized": false,
22
+ "special": true
23
+ },
24
+ {
25
+ "id": 2,
26
+ "content": "[CLS]",
27
+ "single_word": false,
28
+ "lstrip": false,
29
+ "rstrip": false,
30
+ "normalized": false,
31
+ "special": true
32
+ },
33
+ {
34
+ "id": 3,
35
+ "content": "[SEP]",
36
+ "single_word": false,
37
+ "lstrip": false,
38
+ "rstrip": false,
39
+ "normalized": false,
40
+ "special": true
41
+ },
42
+ {
43
+ "id": 4,
44
+ "content": "[UNK]",
45
+ "single_word": false,
46
+ "lstrip": false,
47
+ "rstrip": false,
48
+ "normalized": false,
49
+ "special": true
50
+ }
51
+ ],
52
+ "normalizer": {
53
+ "type": "BertNormalizer",
54
+ "clean_text": true,
55
+ "handle_chinese_chars": true,
56
+ "strip_accents": null,
57
+ "lowercase": false
58
+ },
59
+ "pre_tokenizer": {
60
+ "type": "BertPreTokenizer"
61
+ },
62
+ "post_processor": {
63
+ "type": "TemplateProcessing",
64
+ "single": [
65
+ {
66
+ "SpecialToken": {
67
+ "id": "[CLS]",
68
+ "type_id": 0
69
+ }
70
+ },
71
+ {
72
+ "Sequence": {
73
+ "id": "A",
74
+ "type_id": 0
75
+ }
76
+ },
77
+ {
78
+ "SpecialToken": {
79
+ "id": "[SEP]",
80
+ "type_id": 0
81
+ }
82
+ }
83
+ ],
84
+ "pair": [
85
+ {
86
+ "SpecialToken": {
87
+ "id": "[CLS]",
88
+ "type_id": 0
89
+ }
90
+ },
91
+ {
92
+ "Sequence": {
93
+ "id": "A",
94
+ "type_id": 0
95
+ }
96
+ },
97
+ {
98
+ "SpecialToken": {
99
+ "id": "[SEP]",
100
+ "type_id": 0
101
+ }
102
+ },
103
+ {
104
+ "Sequence": {
105
+ "id": "B",
106
+ "type_id": 1
107
+ }
108
+ },
109
+ {
110
+ "SpecialToken": {
111
+ "id": "[SEP]",
112
+ "type_id": 1
113
+ }
114
+ }
115
+ ],
116
+ "special_tokens": {
117
+ "[CLS]": {
118
+ "id": "[CLS]",
119
+ "ids": [
120
+ 2
121
+ ],
122
+ "tokens": [
123
+ "[CLS]"
124
+ ]
125
+ },
126
+ "[SEP]": {
127
+ "id": "[SEP]",
128
+ "ids": [
129
+ 3
130
+ ],
131
+ "tokens": [
132
+ "[SEP]"
133
+ ]
134
+ }
135
+ }
136
+ },
137
+ "decoder": {
138
+ "type": "WordPiece",
139
+ "prefix": "##",
140
+ "cleanup": true
141
+ },
142
+ "model": {
143
+ "type": "WordPiece",
144
+ "unk_token": "[UNK]",
145
+ "continuing_subword_prefix": "##",
146
+ "max_input_chars_per_word": 100,
147
+ "vocab": {
148
+ "[MASK]": 0,
149
+ "[PAD]": 1,
150
+ "[CLS]": 2,
151
+ "[SEP]": 3,
152
+ "[UNK]": 4,
153
+ "G31": 5,
154
+ "F02": 6,
155
+ "E78": 7,
156
+ "G20": 8,
157
+ "K31": 9,
158
+ "K21": 10,
159
+ "K44": 11,
160
+ "F41": 12,
161
+ "I34": 13,
162
+ "M81": 14,
163
+ "G43": 15,
164
+ "I13": 16,
165
+ "I50": 17,
166
+ "E87": 18,
167
+ "E11": 19,
168
+ "I25": 20,
169
+ "D64": 21,
170
+ "G47": 22,
171
+ "M1A": 23,
172
+ "E66": 24,
173
+ "D50": 25,
174
+ "K29": 26,
175
+ "K25": 27,
176
+ "K57": 28,
177
+ "I12": 29,
178
+ "I70": 30,
179
+ "K22": 31,
180
+ "D63": 32,
181
+ "C67": 33,
182
+ "I10": 34,
183
+ "D25": 35,
184
+ "E89": 36,
185
+ "I26": 37,
186
+ "I82": 38,
187
+ "I47": 39,
188
+ "I87": 40,
189
+ "B95": 41,
190
+ "E03": 42,
191
+ "E83": 43,
192
+ "K59": 44,
193
+ "K65": 45,
194
+ "B96": 46,
195
+ "F17": 47,
196
+ "D72": 48,
197
+ "A41": 49,
198
+ "D62": 50,
199
+ "I27": 51,
200
+ "K43": 52,
201
+ "E43": 53,
202
+ "E53": 54,
203
+ "I48": 55,
204
+ "I11": 56,
205
+ "I45": 57,
206
+ "J44": 58,
207
+ "G56": 59,
208
+ "M19": 60,
209
+ "J45": 61,
210
+ "I73": 62,
211
+ "K72": 63,
212
+ "J96": 64,
213
+ "I44": 65,
214
+ "D69": 66,
215
+ "I46": 67,
216
+ "M47": 68,
217
+ "J01": 69,
218
+ "M79": 70,
219
+ "K92": 71,
220
+ "H40": 72,
221
+ "H69": 73,
222
+ "I24": 74,
223
+ "I49": 75,
224
+ "C16": 76,
225
+ "C78": 77,
226
+ "K91": 78,
227
+ "M86": 79,
228
+ "L97": 80,
229
+ "F32": 81,
230
+ "G25": 82,
231
+ "L03": 83,
232
+ "J98": 84,
233
+ "I96": 85,
234
+ "G89": 86,
235
+ "M25": 87,
236
+ "E46": 88,
237
+ "G30": 89,
238
+ "H91": 90,
239
+ "M17": 91,
240
+ "D68": 92,
241
+ "M46": 93,
242
+ "M70": 94,
243
+ "E55": 95,
244
+ "C50": 96,
245
+ "I83": 97,
246
+ "L76": 98,
247
+ "K76": 99,
248
+ "I67": 100,
249
+ "M51": 101,
250
+ "M48": 102,
251
+ "M35": 103,
252
+ "C79": 104,
253
+ "G93": 105,
254
+ "G91": 106,
255
+ "C34": 107,
256
+ "F05": 108,
257
+ "G51": 109,
258
+ "M21": 110,
259
+ "H53": 111,
260
+ "H04": 112,
261
+ "H35": 113,
262
+ "M50": 114,
263
+ "E04": 115,
264
+ "M80": 116,
265
+ "F03": 117,
266
+ "I28": 118,
267
+ "J43": 119,
268
+ "I07": 120,
269
+ "C43": 121,
270
+ "I65": 122,
271
+ "J18": 123,
272
+ "J90": 124,
273
+ "K40": 125,
274
+ "I30": 126,
275
+ "I31": 127,
276
+ "M06": 128,
277
+ "I21": 129,
278
+ "A04": 130,
279
+ "F44": 131,
280
+ "I95": 132,
281
+ "E86": 133,
282
+ "D51": 134,
283
+ "F43": 135,
284
+ "F50": 136,
285
+ "K02": 137,
286
+ "L73": 138,
287
+ "K85": 139,
288
+ "F39": 140,
289
+ "M54": 141,
290
+ "B20": 142,
291
+ "B19": 143,
292
+ "F10": 144,
293
+ "E16": 145,
294
+ "F19": 146,
295
+ "F14": 147,
296
+ "F20": 148,
297
+ "F29": 149,
298
+ "F11": 150,
299
+ "E10": 151,
300
+ "D86": 152,
301
+ "J02": 153,
302
+ "K20": 154,
303
+ "E21": 155,
304
+ "E51": 156,
305
+ "I69": 157,
306
+ "I63": 158,
307
+ "G81": 159,
308
+ "M85": 160,
309
+ "D49": 161,
310
+ "K74": 162,
311
+ "K66": 163,
312
+ "K62": 164,
313
+ "L91": 165,
314
+ "D57": 166,
315
+ "C90": 167,
316
+ "J91": 168,
317
+ "K56": 169,
318
+ "C21": 170,
319
+ "B37": 171,
320
+ "E88": 172,
321
+ "I35": 173,
322
+ "I42": 174,
323
+ "K61": 175,
324
+ "K60": 176,
325
+ "G92": 177,
326
+ "D46": 178,
327
+ "J84": 179,
328
+ "M84": 180,
329
+ "D18": 181,
330
+ "I61": 182,
331
+ "G40": 183,
332
+ "L02": 184,
333
+ "F88": 185,
334
+ "I08": 186,
335
+ "G62": 187,
336
+ "F31": 188,
337
+ "E44": 189,
338
+ "I15": 190,
339
+ "K80": 191,
340
+ "I16": 192,
341
+ "J69": 193,
342
+ "J95": 194,
343
+ "H26": 195,
344
+ "L40": 196,
345
+ "L57": 197,
346
+ "M72": 198,
347
+ "J30": 199,
348
+ "H61": 200,
349
+ "C56": 201,
350
+ "C77": 202,
351
+ "I81": 203,
352
+ "I85": 204,
353
+ "I86": 205,
354
+ "K70": 206,
355
+ "G96": 207,
356
+ "F12": 208,
357
+ "M53": 209,
358
+ "C7B": 210,
359
+ "C7A": 211,
360
+ "K58": 212,
361
+ "K26": 213,
362
+ "J81": 214,
363
+ "B18": 215,
364
+ "A08": 216,
365
+ "M96": 217,
366
+ "I97": 218,
367
+ "C22": 219,
368
+ "C25": 220,
369
+ "K86": 221,
370
+ "K52": 222,
371
+ "K11": 223,
372
+ "B00": 224,
373
+ "A40": 225,
374
+ "K83": 226,
375
+ "C24": 227,
376
+ "D53": 228,
377
+ "C20": 229,
378
+ "H90": 230,
379
+ "D47": 231,
380
+ "L29": 232,
381
+ "K81": 233,
382
+ "K82": 234,
383
+ "D70": 235,
384
+ "M10": 236,
385
+ "F90": 237,
386
+ "D13": 238,
387
+ "I71": 239,
388
+ "F33": 240,
389
+ "K75": 241,
390
+ "F22": 242,
391
+ "F15": 243,
392
+ "F72": 244,
393
+ "F13": 245,
394
+ "J36": 246,
395
+ "L27": 247,
396
+ "D61": 248,
397
+ "C92": 249,
398
+ "E85": 250,
399
+ "K12": 251,
400
+ "L99": 252,
401
+ "L72": 253,
402
+ "G58": 254,
403
+ "F45": 255,
404
+ "H66": 256,
405
+ "J11": 257,
406
+ "F60": 258,
407
+ "L30": 259,
408
+ "J86": 260,
409
+ "J15": 261,
410
+ "A81": 262,
411
+ "M40": 263,
412
+ "L08": 264,
413
+ "L60": 265,
414
+ "L82": 266,
415
+ "M62": 267,
416
+ "H93": 268,
417
+ "I72": 269,
418
+ "F42": 270,
419
+ "K04": 271,
420
+ "E80": 272,
421
+ "L53": 273,
422
+ "L89": 274,
423
+ "L98": 275,
424
+ "H81": 276,
425
+ "D80": 277,
426
+ "B35": 278,
427
+ "C73": 279,
428
+ "E05": 280,
429
+ "J39": 281,
430
+ "I22": 282,
431
+ "I51": 283,
432
+ "C18": 284,
433
+ "C91": 285,
434
+ "C62": 286,
435
+ "K64": 287,
436
+ "K63": 288,
437
+ "E08": 289,
438
+ "F09": 290,
439
+ "K28": 291,
440
+ "E22": 292,
441
+ "F25": 293,
442
+ "M94": 294,
443
+ "M71": 295,
444
+ "J20": 296,
445
+ "K95": 297,
446
+ "M16": 298,
447
+ "L85": 299,
448
+ "B34": 300,
449
+ "J04": 301,
450
+ "J47": 302,
451
+ "M31": 303,
452
+ "M41": 304,
453
+ "L71": 305,
454
+ "M00": 306,
455
+ "M89": 307,
456
+ "J94": 308,
457
+ "C61": 309,
458
+ "C80": 310,
459
+ "J93": 311,
460
+ "K50": 312,
461
+ "B02": 313,
462
+ "J06": 314,
463
+ "I09": 315,
464
+ "D32": 316,
465
+ "I33": 317,
466
+ "L28": 318,
467
+ "F23": 319,
468
+ "G12": 320,
469
+ "E02": 321,
470
+ "I60": 322,
471
+ "L95": 323,
472
+ "H33": 324,
473
+ "K08": 325,
474
+ "D73": 326,
475
+ "I20": 327,
476
+ "D59": 328,
477
+ "E79": 329,
478
+ "J60": 330,
479
+ "C95": 331,
480
+ "C94": 332,
481
+ "F84": 333,
482
+ "L05": 334,
483
+ "M30": 335,
484
+ "J32": 336,
485
+ "D23": 337,
486
+ "K42": 338,
487
+ "D35": 339,
488
+ "J34": 340,
489
+ "E26": 341,
490
+ "I80": 342,
491
+ "K51": 343,
492
+ "I36": 344,
493
+ "K55": 345,
494
+ "E27": 346,
495
+ "D75": 347,
496
+ "D55": 348,
497
+ "J40": 349,
498
+ "G90": 350,
499
+ "D12": 351,
500
+ "E56": 352,
501
+ "F51": 353,
502
+ "G35": 354,
503
+ "M87": 355,
504
+ "E06": 356,
505
+ "I43": 357,
506
+ "G44": 358,
507
+ "G95": 359,
508
+ "C02": 360,
509
+ "G83": 361,
510
+ "F79": 362,
511
+ "I77": 363,
512
+ "E73": 364,
513
+ "H05": 365,
514
+ "H54": 366,
515
+ "H46": 367,
516
+ "M08": 368,
517
+ "M11": 369,
518
+ "H11": 370,
519
+ "F28": 371,
520
+ "M97": 372,
521
+ "C60": 373,
522
+ "M99": 374,
523
+ "M34": 375,
524
+ "F01": 376,
525
+ "I66": 377,
526
+ "H43": 378,
527
+ "H25": 379,
528
+ "D89": 380,
529
+ "G60": 381,
530
+ "I89": 382,
531
+ "M20": 383,
532
+ "E13": 384,
533
+ "L93": 385,
534
+ "E61": 386,
535
+ "A09": 387,
536
+ "G24": 388,
537
+ "G21": 389,
538
+ "M75": 390,
539
+ "M05": 391,
540
+ "D60": 392,
541
+ "B44": 393,
542
+ "B59": 394,
543
+ "A49": 395,
544
+ "A18": 396,
545
+ "M43": 397,
546
+ "M23": 398,
547
+ "K94": 399,
548
+ "C81": 400,
549
+ "K35": 401,
550
+ "F07": 402,
551
+ "G82": 403,
552
+ "E74": 404,
553
+ "C45": 405,
554
+ "E59": 406,
555
+ "E60": 407,
556
+ "A48": 408,
557
+ "G45": 409,
558
+ "F30": 410,
559
+ "H34": 411,
560
+ "C85": 412,
561
+ "M32": 413,
562
+ "I05": 414,
563
+ "D28": 415,
564
+ "M24": 416,
565
+ "E23": 417,
566
+ "C71": 418,
567
+ "F40": 419,
568
+ "D38": 420,
569
+ "D48": 421,
570
+ "C64": 422,
571
+ "H10": 423,
572
+ "D65": 424,
573
+ "B49": 425,
574
+ "H16": 426,
575
+ "I99": 427,
576
+ "L70": 428,
577
+ "C48": 429,
578
+ "D43": 430,
579
+ "E07": 431,
580
+ "L10": 432,
581
+ "C83": 433,
582
+ "J70": 434,
583
+ "I68": 435,
584
+ "J99": 436,
585
+ "H15": 437,
586
+ "B36": 438,
587
+ "D58": 439,
588
+ "L23": 440,
589
+ "I23": 441,
590
+ "J09": 442,
591
+ "B94": 443,
592
+ "I74": 444,
593
+ "G54": 445,
594
+ "B99": 446,
595
+ "G72": 447,
596
+ "G50": 448,
597
+ "G97": 449,
598
+ "D17": 450,
599
+ "E09": 451,
600
+ "L13": 452,
601
+ "G61": 453,
602
+ "A43": 454,
603
+ "H02": 455,
604
+ "H57": 456,
605
+ "H49": 457,
606
+ "I38": 458,
607
+ "G37": 459,
608
+ "F21": 460,
609
+ "D56": 461,
610
+ "K90": 462,
611
+ "J92": 463,
612
+ "J12": 464,
613
+ "B97": 465,
614
+ "J38": 466,
615
+ "L80": 467,
616
+ "H31": 468,
617
+ "A42": 469,
618
+ "M13": 470,
619
+ "M77": 471,
620
+ "M65": 472,
621
+ "L88": 473,
622
+ "C44": 474,
623
+ "G06": 475,
624
+ "L25": 476,
625
+ "D34": 477,
626
+ "K46": 478,
627
+ "M60": 479,
628
+ "I62": 480,
629
+ "D83": 481,
630
+ "F64": 482,
631
+ "D41": 483,
632
+ "E72": 484,
633
+ "B60": 485,
634
+ "J61": 486,
635
+ "J85": 487,
636
+ "C10": 488,
637
+ "A63": 489,
638
+ "C14": 490,
639
+ "C09": 491,
640
+ "F55": 492,
641
+ "F99": 493,
642
+ "M76": 494,
643
+ "A60": 495,
644
+ "C54": 496,
645
+ "F91": 497,
646
+ "F63": 498,
647
+ "J14": 499,
648
+ "F06": 500,
649
+ "C88": 501,
650
+ "H83": 502,
651
+ "J31": 503,
652
+ "I37": 504,
653
+ "K68": 505,
654
+ "B45": 506,
655
+ "G04": 507,
656
+ "J10": 508,
657
+ "B85": 509,
658
+ "C46": 510,
659
+ "B48": 511,
660
+ "C40": 512,
661
+ "J80": 513,
662
+ "H51": 514,
663
+ "G46": 515,
664
+ "M15": 516,
665
+ "K06": 517,
666
+ "A69": 518,
667
+ "J13": 519,
668
+ "I78": 520,
669
+ "J00": 521,
670
+ "M27": 522,
671
+ "G08": 523,
672
+ "H92": 524,
673
+ "E63": 525,
674
+ "B33": 526,
675
+ "C19": 527,
676
+ "E34": 528,
677
+ "L50": 529,
678
+ "I06": 530,
679
+ "K71": 531,
680
+ "K36": 532,
681
+ "K14": 533,
682
+ "H18": 534,
683
+ "E29": 535,
684
+ "E54": 536,
685
+ "D30": 537,
686
+ "E28": 538,
687
+ "K30": 539,
688
+ "B86": 540,
689
+ "G57": 541,
690
+ "H21": 542,
691
+ "H44": 543,
692
+ "H20": 544,
693
+ "C15": 545,
694
+ "G80": 546,
695
+ "D36": 547,
696
+ "F34": 548,
697
+ "D27": 549,
698
+ "C51": 550,
699
+ "L90": 551,
700
+ "D52": 552,
701
+ "H55": 553,
702
+ "J62": 554,
703
+ "C93": 555,
704
+ "C57": 556,
705
+ "D3A": 557,
706
+ "A03": 558,
707
+ "E84": 559,
708
+ "D45": 560,
709
+ "K03": 561,
710
+ "A31": 562,
711
+ "B27": 563,
712
+ "L81": 564,
713
+ "G63": 565,
714
+ "D67": 566,
715
+ "F16": 567,
716
+ "K41": 568,
717
+ "K05": 569,
718
+ "G23": 570,
719
+ "C41": 571,
720
+ "K73": 572,
721
+ "J33": 573,
722
+ "L21": 574,
723
+ "D76": 575,
724
+ "E31": 576,
725
+ "C82": 577,
726
+ "B25": 578,
727
+ "H30": 579,
728
+ "A07": 580,
729
+ "B17": 581,
730
+ "L64": 582,
731
+ "K27": 583,
732
+ "L24": 584,
733
+ "M45": 585,
734
+ "F48": 586,
735
+ "A77": 587,
736
+ "C49": 588,
737
+ "G71": 589,
738
+ "A15": 590,
739
+ "L54": 591,
740
+ "C23": 592,
741
+ "G99": 593,
742
+ "H60": 594,
743
+ "M12": 595,
744
+ "F81": 596,
745
+ "M90": 597,
746
+ "H36": 598,
747
+ "D15": 599,
748
+ "C68": 600,
749
+ "L74": 601,
750
+ "C65": 602,
751
+ "A53": 603,
752
+ "M95": 604,
753
+ "E65": 605,
754
+ "C4A": 606,
755
+ "H42": 607,
756
+ "H50": 608,
757
+ "M88": 609,
758
+ "H72": 610,
759
+ "L94": 611,
760
+ "A19": 612,
761
+ "C76": 613,
762
+ "L55": 614,
763
+ "D71": 615,
764
+ "D40": 616,
765
+ "G70": 617,
766
+ "G52": 618,
767
+ "L43": 619,
768
+ "E24": 620,
769
+ "C52": 621,
770
+ "L20": 622,
771
+ "B82": 623,
772
+ "C37": 624,
773
+ "D05": 625,
774
+ "J42": 626,
775
+ "C53": 627,
776
+ "J82": 628,
777
+ "H70": 629,
778
+ "C74": 630,
779
+ "D22": 631,
780
+ "B15": 632,
781
+ "D02": 633,
782
+ "B30": 634,
783
+ "C17": 635,
784
+ "I76": 636,
785
+ "B91": 637,
786
+ "C84": 638,
787
+ "B78": 639,
788
+ "L01": 640,
789
+ "E20": 641,
790
+ "M92": 642,
791
+ "J67": 643,
792
+ "J05": 644,
793
+ "K13": 645,
794
+ "A46": 646,
795
+ "G03": 647,
796
+ "B09": 648,
797
+ "F95": 649,
798
+ "C26": 650,
799
+ "F53": 651,
800
+ "C06": 652,
801
+ "F18": 653,
802
+ "C72": 654,
803
+ "D33": 655,
804
+ "A87": 656,
805
+ "H47": 657,
806
+ "I32": 658,
807
+ "G59": 659,
808
+ "B16": 660,
809
+ "K37": 661,
810
+ "H01": 662,
811
+ "B01": 663,
812
+ "G98": 664,
813
+ "D44": 665,
814
+ "E50": 666,
815
+ "H80": 667,
816
+ "G36": 668,
817
+ "D74": 669,
818
+ "M66": 670,
819
+ "C86": 671,
820
+ "C32": 672,
821
+ "C66": 673,
822
+ "L92": 674,
823
+ "G65": 675,
824
+ "D26": 676,
825
+ "F70": 677,
826
+ "M26": 678,
827
+ "C33": 679,
828
+ "K01": 680,
829
+ "M18": 681,
830
+ "D00": 682,
831
+ "B46": 683,
832
+ "D37": 684,
833
+ "A02": 685,
834
+ "C07": 686,
835
+ "F71": 687,
836
+ "G10": 688,
837
+ "H27": 689,
838
+ "L63": 690,
839
+ "K38": 691,
840
+ "C04": 692,
841
+ "C08": 693,
842
+ "J16": 694,
843
+ "H52": 695,
844
+ "L65": 696,
845
+ "L84": 697,
846
+ "D16": 698,
847
+ "J03": 699,
848
+ "L52": 700,
849
+ "G00": 701,
850
+ "I75": 702,
851
+ "H00": 703,
852
+ "L11": 704,
853
+ "E01": 705,
854
+ "H59": 706,
855
+ "M22": 707,
856
+ "C96": 708,
857
+ "B07": 709,
858
+ "D84": 710,
859
+ "E32": 711,
860
+ "A54": 712,
861
+ "A56": 713,
862
+ "D39": 714,
863
+ "E70": 715,
864
+ "M14": 716,
865
+ "A52": 717,
866
+ "F54": 718,
867
+ "F98": 719,
868
+ "F68": 720,
869
+ "A86": 721,
870
+ "A64": 722,
871
+ "M61": 723,
872
+ "I88": 724,
873
+ "D29": 725,
874
+ "J22": 726,
875
+ "F80": 727,
876
+ "D24": 728,
877
+ "B53": 729,
878
+ "J66": 730,
879
+ "F78": 731,
880
+ "E75": 732,
881
+ "M83": 733,
882
+ "B90": 734,
883
+ "A44": 735,
884
+ "E41": 736,
885
+ "M33": 737,
886
+ "L59": 738,
887
+ "M67": 739,
888
+ "D04": 740,
889
+ "D09": 741,
890
+ "K45": 742,
891
+ "D07": 743,
892
+ "D01": 744,
893
+ "M93": 745,
894
+ "D78": 746,
895
+ "L12": 747,
896
+ "I40": 748,
897
+ "J68": 749,
898
+ "G11": 750,
899
+ "L68": 751,
900
+ "M07": 752,
901
+ "A36": 753,
902
+ "F82": 754,
903
+ "C05": 755,
904
+ "J35": 756,
905
+ "F89": 757,
906
+ "J21": 758,
907
+ "E71": 759,
908
+ "G14": 760,
909
+ "C70": 761,
910
+ "B10": 762,
911
+ "C58": 763,
912
+ "C01": 764,
913
+ "L44": 765,
914
+ "C55": 766,
915
+ "G13": 767,
916
+ "C13": 768,
917
+ "A28": 769,
918
+ "D66": 770,
919
+ "M36": 771,
920
+ "B88": 772,
921
+ "B47": 773,
922
+ "A74": 774,
923
+ "B58": 775,
924
+ "I00": 776,
925
+ "A85": 777,
926
+ "L56": 778,
927
+ "F04": 779,
928
+ "G94": 780,
929
+ "A39": 781,
930
+ "D21": 782,
931
+ "B57": 783,
932
+ "B69": 784,
933
+ "L04": 785,
934
+ "L51": 786,
935
+ "M02": 787,
936
+ "J17": 788,
937
+ "D82": 789,
938
+ "C47": 790,
939
+ "K00": 791,
940
+ "H73": 792,
941
+ "J63": 793,
942
+ "A51": 794,
943
+ "F65": 795,
944
+ "F69": 796,
945
+ "F94": 797,
946
+ "J41": 798,
947
+ "D31": 799,
948
+ "B67": 800,
949
+ "M01": 801,
950
+ "G73": 802,
951
+ "D19": 803,
952
+ "C12": 804,
953
+ "D11": 805,
954
+ "A93": 806,
955
+ "B65": 807,
956
+ "L86": 808,
957
+ "B87": 809,
958
+ "C75": 810,
959
+ "C03": 811,
960
+ "E00": 812,
961
+ "H71": 813,
962
+ "C31": 814,
963
+ "G32": 815,
964
+ "L83": 816,
965
+ "H74": 817,
966
+ "A05": 818,
967
+ "B50": 819,
968
+ "D03": 820,
969
+ "G09": 821,
970
+ "B54": 822,
971
+ "E25": 823,
972
+ "D06": 824,
973
+ "C11": 825,
974
+ "A21": 826,
975
+ "M04": 827,
976
+ "B08": 828,
977
+ "D14": 829,
978
+ "A59": 830,
979
+ "D10": 831,
980
+ "A75": 832,
981
+ "A32": 833,
982
+ "C00": 834,
983
+ "M63": 835,
984
+ "I39": 836,
985
+ "B83": 837,
986
+ "B26": 838,
987
+ "E64": 839,
988
+ "G53": 840,
989
+ "A90": 841,
990
+ "E77": 842,
991
+ "A70": 843,
992
+ "G55": 844,
993
+ "C38": 845,
994
+ "A84": 846,
995
+ "E52": 847,
996
+ "F73": 848,
997
+ "I01": 849,
998
+ "A22": 850,
999
+ "H95": 851,
1000
+ "F52": 852,
1001
+ "G64": 853,
1002
+ "J37": 854,
1003
+ "A37": 855,
1004
+ "D81": 856,
1005
+ "E58": 857,
1006
+ "G05": 858,
1007
+ "H65": 859,
1008
+ "A01": 860,
1009
+ "A26": 861,
1010
+ "E67": 862,
1011
+ "B38": 863,
1012
+ "L87": 864,
1013
+ "M42": 865,
1014
+ "H62": 866,
1015
+ "L75": 867,
1016
+ "A35": 868,
1017
+ "L22": 869,
1018
+ "C63": 870,
1019
+ "A79": 871,
1020
+ "K87": 872,
1021
+ "M91": 873,
1022
+ "L66": 874,
1023
+ "B43": 875,
1024
+ "B40": 876,
1025
+ "B42": 877,
1026
+ "L42": 878,
1027
+ "A24": 879,
1028
+ "K77": 880,
1029
+ "G26": 881,
1030
+ "E68": 882,
1031
+ "C69": 883,
1032
+ "F59": 884,
1033
+ "H75": 885,
1034
+ "H94": 886,
1035
+ "H67": 887,
1036
+ "L26": 888,
1037
+ "C30": 889,
1038
+ "H68": 890,
1039
+ "A55": 891,
1040
+ "A57": 892,
1041
+ "J64": 893,
1042
+ "A50": 894,
1043
+ "B76": 895,
1044
+ "H17": 896,
1045
+ "A06": 897,
1046
+ "E76": 898,
1047
+ "A17": 899,
1048
+ "L41": 900,
1049
+ "B39": 901,
1050
+ "D42": 902,
1051
+ "A92": 903,
1052
+ "A83": 904,
1053
+ "D20": 905,
1054
+ "L58": 906,
1055
+ "B56": 907,
1056
+ "A27": 908,
1057
+ "A80": 909,
1058
+ "A68": 910
1059
+ }
1060
+ }
1061
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[MASK]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[PAD]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "[UNK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": false,
48
+ "keep_accents": true,
49
+ "mask_token": "[MASK]",
50
+ "model_max_length": 1000000000000000019884624838656,
51
+ "never_split": null,
52
+ "pad_token": "[PAD]",
53
+ "remove_space": true,
54
+ "sep_token": "[SEP]",
55
+ "strip_accents": null,
56
+ "tokenize_chinese_chars": true,
57
+ "tokenizer_class": "FNetTokenizer",
58
+ "unk_token": "[UNK]"
59
+ }