Clemylia commited on
Commit
aab0028
·
verified ·
1 Parent(s): 712bf30

Ajout du tokenizer associé au modèle final

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +9 -0
  2. tokenizer.json +907 -0
  3. tokenizer_config.json +49 -0
special_tokens_map.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "pad_token": {
3
+ "content": "[PAD]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ }
9
+ }
tokenizer.json ADDED
@@ -0,0 +1,907 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [
6
+ {
7
+ "id": 0,
8
+ "content": "[UNK]",
9
+ "single_word": false,
10
+ "lstrip": false,
11
+ "rstrip": false,
12
+ "normalized": false,
13
+ "special": true
14
+ },
15
+ {
16
+ "id": 1,
17
+ "content": "[CLS]",
18
+ "single_word": false,
19
+ "lstrip": false,
20
+ "rstrip": false,
21
+ "normalized": false,
22
+ "special": true
23
+ },
24
+ {
25
+ "id": 2,
26
+ "content": "[SEP]",
27
+ "single_word": false,
28
+ "lstrip": false,
29
+ "rstrip": false,
30
+ "normalized": false,
31
+ "special": true
32
+ },
33
+ {
34
+ "id": 3,
35
+ "content": "[PAD]",
36
+ "single_word": false,
37
+ "lstrip": false,
38
+ "rstrip": false,
39
+ "normalized": false,
40
+ "special": true
41
+ },
42
+ {
43
+ "id": 4,
44
+ "content": "[MASK]",
45
+ "single_word": false,
46
+ "lstrip": false,
47
+ "rstrip": false,
48
+ "normalized": false,
49
+ "special": true
50
+ }
51
+ ],
52
+ "normalizer": null,
53
+ "pre_tokenizer": {
54
+ "type": "Whitespace"
55
+ },
56
+ "post_processor": null,
57
+ "decoder": null,
58
+ "model": {
59
+ "type": "BPE",
60
+ "dropout": null,
61
+ "unk_token": "[UNK]",
62
+ "continuing_subword_prefix": null,
63
+ "end_of_word_suffix": null,
64
+ "fuse_unk": false,
65
+ "byte_fallback": false,
66
+ "ignore_merges": false,
67
+ "vocab": {
68
+ "[UNK]": 0,
69
+ "[CLS]": 1,
70
+ "[SEP]": 2,
71
+ "[PAD]": 3,
72
+ "[MASK]": 4,
73
+ "!": 5,
74
+ ",": 6,
75
+ ".": 7,
76
+ "A": 8,
77
+ "E": 9,
78
+ "F": 10,
79
+ "G": 11,
80
+ "H": 12,
81
+ "J": 13,
82
+ "K": 14,
83
+ "L": 15,
84
+ "M": 16,
85
+ "N": 17,
86
+ "O": 18,
87
+ "P": 19,
88
+ "R": 20,
89
+ "S": 21,
90
+ "T": 22,
91
+ "a": 23,
92
+ "d": 24,
93
+ "e": 25,
94
+ "f": 26,
95
+ "g": 27,
96
+ "h": 28,
97
+ "i": 29,
98
+ "j": 30,
99
+ "k": 31,
100
+ "l": 32,
101
+ "m": 33,
102
+ "n": 34,
103
+ "o": 35,
104
+ "p": 36,
105
+ "r": 37,
106
+ "s": 38,
107
+ "t": 39,
108
+ "u": 40,
109
+ "w": 41,
110
+ "x": 42,
111
+ "y": 43,
112
+ "z": 44,
113
+ "na": 45,
114
+ "ra": 46,
115
+ "as": 47,
116
+ "mo": 48,
117
+ "it": 49,
118
+ "ita": 50,
119
+ "ol": 51,
120
+ "ia": 52,
121
+ "ina": 53,
122
+ "fe": 54,
123
+ "op": 55,
124
+ "ki": 56,
125
+ "sa": 57,
126
+ "ola": 58,
127
+ "rita": 59,
128
+ "li": 60,
129
+ "az": 61,
130
+ "er": 62,
131
+ "lola": 63,
132
+ "itana": 64,
133
+ "opa": 65,
134
+ "nu": 66,
135
+ "nus": 67,
136
+ "titana": 68,
137
+ "popa": 69,
138
+ "mona": 70,
139
+ "fenus": 71,
140
+ "liia": 72,
141
+ "ni": 73,
142
+ "asna": 74,
143
+ "momo": 75,
144
+ "aze": 76,
145
+ "iara": 77,
146
+ "fera": 78,
147
+ "nina": 79,
148
+ "momorita": 80,
149
+ "aski": 81,
150
+ "ma": 82,
151
+ "we": 83,
152
+ "kiara": 84,
153
+ "ro": 85,
154
+ "wen": 86,
155
+ "kol": 87,
156
+ "ll": 88,
157
+ "llia": 89,
158
+ "era": 90,
159
+ "ga": 91,
160
+ "kira": 92,
161
+ "sallia": 93,
162
+ "ka": 94,
163
+ "olga": 95,
164
+ "ona": 96,
165
+ "wx": 97,
166
+ "yna": 98,
167
+ "erina": 99,
168
+ "wxa": 100,
169
+ "gwen": 101,
170
+ "ie": 102,
171
+ "rosa": 103,
172
+ "As": 104,
173
+ "hi": 105,
174
+ "hyna": 106,
175
+ "aska": 107,
176
+ "ashi": 108,
177
+ "mwxa": 109,
178
+ "uni": 110,
179
+ "gwenna": 111,
180
+ "asni": 112,
181
+ "erie": 113,
182
+ "kina": 114,
183
+ "eera": 115,
184
+ "opma": 116,
185
+ "kolina": 117,
186
+ "in": 118,
187
+ "jona": 119,
188
+ "lopma": 120,
189
+ "azin": 121,
190
+ "azinn": 122,
191
+ "una": 123,
192
+ "Lola": 124,
193
+ "jeera": 125,
194
+ "ja": 126,
195
+ "kolma": 127,
196
+ "Mo": 128,
197
+ "da": 129,
198
+ "de": 130,
199
+ "lisa": 131,
200
+ "delisa": 132,
201
+ "juna": 133,
202
+ "suni": 134,
203
+ "asnina": 135,
204
+ "Er": 136,
205
+ "se": 137,
206
+ "Fera": 138,
207
+ "lazinn": 139,
208
+ "Suni": 140,
209
+ "erra": 141,
210
+ "momodelisa": 142,
211
+ "Rita": 143,
212
+ "Aski": 144,
213
+ "Momo": 145,
214
+ "Kiara": 146,
215
+ "Gwen": 147,
216
+ "Kina": 148,
217
+ "Nina": 149,
218
+ "Ro": 150,
219
+ "rose": 151,
220
+ "Lazinn": 152,
221
+ "Erie": 153,
222
+ "Jona": 154,
223
+ "Sa": 155,
224
+ "Asni": 156,
225
+ "Juna": 157,
226
+ "Mwxa": 158,
227
+ "lli": 159,
228
+ "Aska": 160,
229
+ "Mora": 161,
230
+ "Rose": 162,
231
+ "Kol": 163,
232
+ "Li": 164,
233
+ "Mona": 165,
234
+ "Asna": 166,
235
+ "Erra": 167,
236
+ "Liia": 168,
237
+ "Fe": 169,
238
+ "Jeera": 170,
239
+ "Ki": 171,
240
+ "Popa": 172,
241
+ "mora": 173,
242
+ "Momodelisa": 174,
243
+ "Gwenna": 175,
244
+ "Rosa": 176,
245
+ "Sallia": 177,
246
+ "Salli": 178,
247
+ "Kolma": 179,
248
+ "Fenus": 180,
249
+ "Kira": 181,
250
+ "Az": 182,
251
+ "Hyna": 183,
252
+ "Titana": 184,
253
+ "Asnina": 185,
254
+ "Aze": 186,
255
+ "Ol": 187,
256
+ "le": 188,
257
+ "salli": 189,
258
+ "Ashi": 190,
259
+ "Erina": 191,
260
+ "Olga": 192,
261
+ "..": 193,
262
+ "Ja": 194,
263
+ "Je": 195,
264
+ "Lopma": 196,
265
+ "ar": 197,
266
+ "gwe": 198,
267
+ "par": 199,
268
+ "Kolina": 200,
269
+ "gwena": 201,
270
+ "parle": 202
271
+ },
272
+ "merges": [
273
+ [
274
+ "n",
275
+ "a"
276
+ ],
277
+ [
278
+ "r",
279
+ "a"
280
+ ],
281
+ [
282
+ "a",
283
+ "s"
284
+ ],
285
+ [
286
+ "m",
287
+ "o"
288
+ ],
289
+ [
290
+ "i",
291
+ "t"
292
+ ],
293
+ [
294
+ "it",
295
+ "a"
296
+ ],
297
+ [
298
+ "o",
299
+ "l"
300
+ ],
301
+ [
302
+ "i",
303
+ "a"
304
+ ],
305
+ [
306
+ "i",
307
+ "na"
308
+ ],
309
+ [
310
+ "f",
311
+ "e"
312
+ ],
313
+ [
314
+ "o",
315
+ "p"
316
+ ],
317
+ [
318
+ "k",
319
+ "i"
320
+ ],
321
+ [
322
+ "s",
323
+ "a"
324
+ ],
325
+ [
326
+ "ol",
327
+ "a"
328
+ ],
329
+ [
330
+ "r",
331
+ "ita"
332
+ ],
333
+ [
334
+ "l",
335
+ "i"
336
+ ],
337
+ [
338
+ "a",
339
+ "z"
340
+ ],
341
+ [
342
+ "e",
343
+ "r"
344
+ ],
345
+ [
346
+ "l",
347
+ "ola"
348
+ ],
349
+ [
350
+ "ita",
351
+ "na"
352
+ ],
353
+ [
354
+ "op",
355
+ "a"
356
+ ],
357
+ [
358
+ "n",
359
+ "u"
360
+ ],
361
+ [
362
+ "nu",
363
+ "s"
364
+ ],
365
+ [
366
+ "t",
367
+ "itana"
368
+ ],
369
+ [
370
+ "p",
371
+ "opa"
372
+ ],
373
+ [
374
+ "mo",
375
+ "na"
376
+ ],
377
+ [
378
+ "fe",
379
+ "nus"
380
+ ],
381
+ [
382
+ "li",
383
+ "ia"
384
+ ],
385
+ [
386
+ "n",
387
+ "i"
388
+ ],
389
+ [
390
+ "as",
391
+ "na"
392
+ ],
393
+ [
394
+ "mo",
395
+ "mo"
396
+ ],
397
+ [
398
+ "az",
399
+ "e"
400
+ ],
401
+ [
402
+ "ia",
403
+ "ra"
404
+ ],
405
+ [
406
+ "fe",
407
+ "ra"
408
+ ],
409
+ [
410
+ "n",
411
+ "ina"
412
+ ],
413
+ [
414
+ "momo",
415
+ "rita"
416
+ ],
417
+ [
418
+ "as",
419
+ "ki"
420
+ ],
421
+ [
422
+ "m",
423
+ "a"
424
+ ],
425
+ [
426
+ "w",
427
+ "e"
428
+ ],
429
+ [
430
+ "k",
431
+ "iara"
432
+ ],
433
+ [
434
+ "r",
435
+ "o"
436
+ ],
437
+ [
438
+ "we",
439
+ "n"
440
+ ],
441
+ [
442
+ "k",
443
+ "ol"
444
+ ],
445
+ [
446
+ "l",
447
+ "l"
448
+ ],
449
+ [
450
+ "ll",
451
+ "ia"
452
+ ],
453
+ [
454
+ "e",
455
+ "ra"
456
+ ],
457
+ [
458
+ "g",
459
+ "a"
460
+ ],
461
+ [
462
+ "ki",
463
+ "ra"
464
+ ],
465
+ [
466
+ "sa",
467
+ "llia"
468
+ ],
469
+ [
470
+ "k",
471
+ "a"
472
+ ],
473
+ [
474
+ "ol",
475
+ "ga"
476
+ ],
477
+ [
478
+ "o",
479
+ "na"
480
+ ],
481
+ [
482
+ "w",
483
+ "x"
484
+ ],
485
+ [
486
+ "y",
487
+ "na"
488
+ ],
489
+ [
490
+ "er",
491
+ "ina"
492
+ ],
493
+ [
494
+ "wx",
495
+ "a"
496
+ ],
497
+ [
498
+ "g",
499
+ "wen"
500
+ ],
501
+ [
502
+ "i",
503
+ "e"
504
+ ],
505
+ [
506
+ "ro",
507
+ "sa"
508
+ ],
509
+ [
510
+ "A",
511
+ "s"
512
+ ],
513
+ [
514
+ "h",
515
+ "i"
516
+ ],
517
+ [
518
+ "h",
519
+ "yna"
520
+ ],
521
+ [
522
+ "as",
523
+ "ka"
524
+ ],
525
+ [
526
+ "as",
527
+ "hi"
528
+ ],
529
+ [
530
+ "m",
531
+ "wxa"
532
+ ],
533
+ [
534
+ "u",
535
+ "ni"
536
+ ],
537
+ [
538
+ "gwen",
539
+ "na"
540
+ ],
541
+ [
542
+ "as",
543
+ "ni"
544
+ ],
545
+ [
546
+ "er",
547
+ "ie"
548
+ ],
549
+ [
550
+ "k",
551
+ "ina"
552
+ ],
553
+ [
554
+ "e",
555
+ "era"
556
+ ],
557
+ [
558
+ "op",
559
+ "ma"
560
+ ],
561
+ [
562
+ "kol",
563
+ "ina"
564
+ ],
565
+ [
566
+ "i",
567
+ "n"
568
+ ],
569
+ [
570
+ "j",
571
+ "ona"
572
+ ],
573
+ [
574
+ "l",
575
+ "opma"
576
+ ],
577
+ [
578
+ "az",
579
+ "in"
580
+ ],
581
+ [
582
+ "azin",
583
+ "n"
584
+ ],
585
+ [
586
+ "u",
587
+ "na"
588
+ ],
589
+ [
590
+ "L",
591
+ "ola"
592
+ ],
593
+ [
594
+ "j",
595
+ "eera"
596
+ ],
597
+ [
598
+ "j",
599
+ "a"
600
+ ],
601
+ [
602
+ "kol",
603
+ "ma"
604
+ ],
605
+ [
606
+ "M",
607
+ "o"
608
+ ],
609
+ [
610
+ "d",
611
+ "a"
612
+ ],
613
+ [
614
+ "d",
615
+ "e"
616
+ ],
617
+ [
618
+ "li",
619
+ "sa"
620
+ ],
621
+ [
622
+ "de",
623
+ "lisa"
624
+ ],
625
+ [
626
+ "j",
627
+ "una"
628
+ ],
629
+ [
630
+ "s",
631
+ "uni"
632
+ ],
633
+ [
634
+ "as",
635
+ "nina"
636
+ ],
637
+ [
638
+ "E",
639
+ "r"
640
+ ],
641
+ [
642
+ "s",
643
+ "e"
644
+ ],
645
+ [
646
+ "F",
647
+ "era"
648
+ ],
649
+ [
650
+ "l",
651
+ "azinn"
652
+ ],
653
+ [
654
+ "S",
655
+ "uni"
656
+ ],
657
+ [
658
+ "er",
659
+ "ra"
660
+ ],
661
+ [
662
+ "momo",
663
+ "delisa"
664
+ ],
665
+ [
666
+ "R",
667
+ "ita"
668
+ ],
669
+ [
670
+ "As",
671
+ "ki"
672
+ ],
673
+ [
674
+ "Mo",
675
+ "mo"
676
+ ],
677
+ [
678
+ "K",
679
+ "iara"
680
+ ],
681
+ [
682
+ "G",
683
+ "wen"
684
+ ],
685
+ [
686
+ "K",
687
+ "ina"
688
+ ],
689
+ [
690
+ "N",
691
+ "ina"
692
+ ],
693
+ [
694
+ "R",
695
+ "o"
696
+ ],
697
+ [
698
+ "ro",
699
+ "se"
700
+ ],
701
+ [
702
+ "L",
703
+ "azinn"
704
+ ],
705
+ [
706
+ "Er",
707
+ "ie"
708
+ ],
709
+ [
710
+ "J",
711
+ "ona"
712
+ ],
713
+ [
714
+ "S",
715
+ "a"
716
+ ],
717
+ [
718
+ "As",
719
+ "ni"
720
+ ],
721
+ [
722
+ "J",
723
+ "una"
724
+ ],
725
+ [
726
+ "M",
727
+ "wxa"
728
+ ],
729
+ [
730
+ "l",
731
+ "li"
732
+ ],
733
+ [
734
+ "As",
735
+ "ka"
736
+ ],
737
+ [
738
+ "Mo",
739
+ "ra"
740
+ ],
741
+ [
742
+ "Ro",
743
+ "se"
744
+ ],
745
+ [
746
+ "K",
747
+ "ol"
748
+ ],
749
+ [
750
+ "L",
751
+ "i"
752
+ ],
753
+ [
754
+ "M",
755
+ "ona"
756
+ ],
757
+ [
758
+ "As",
759
+ "na"
760
+ ],
761
+ [
762
+ "Er",
763
+ "ra"
764
+ ],
765
+ [
766
+ "Li",
767
+ "ia"
768
+ ],
769
+ [
770
+ "F",
771
+ "e"
772
+ ],
773
+ [
774
+ "J",
775
+ "eera"
776
+ ],
777
+ [
778
+ "K",
779
+ "i"
780
+ ],
781
+ [
782
+ "P",
783
+ "opa"
784
+ ],
785
+ [
786
+ "mo",
787
+ "ra"
788
+ ],
789
+ [
790
+ "Momo",
791
+ "delisa"
792
+ ],
793
+ [
794
+ "Gwen",
795
+ "na"
796
+ ],
797
+ [
798
+ "Ro",
799
+ "sa"
800
+ ],
801
+ [
802
+ "Sa",
803
+ "llia"
804
+ ],
805
+ [
806
+ "Sa",
807
+ "lli"
808
+ ],
809
+ [
810
+ "Kol",
811
+ "ma"
812
+ ],
813
+ [
814
+ "Fe",
815
+ "nus"
816
+ ],
817
+ [
818
+ "Ki",
819
+ "ra"
820
+ ],
821
+ [
822
+ "A",
823
+ "z"
824
+ ],
825
+ [
826
+ "H",
827
+ "yna"
828
+ ],
829
+ [
830
+ "T",
831
+ "itana"
832
+ ],
833
+ [
834
+ "As",
835
+ "nina"
836
+ ],
837
+ [
838
+ "Az",
839
+ "e"
840
+ ],
841
+ [
842
+ "O",
843
+ "l"
844
+ ],
845
+ [
846
+ "l",
847
+ "e"
848
+ ],
849
+ [
850
+ "sa",
851
+ "lli"
852
+ ],
853
+ [
854
+ "As",
855
+ "hi"
856
+ ],
857
+ [
858
+ "Er",
859
+ "ina"
860
+ ],
861
+ [
862
+ "Ol",
863
+ "ga"
864
+ ],
865
+ [
866
+ ".",
867
+ "."
868
+ ],
869
+ [
870
+ "J",
871
+ "a"
872
+ ],
873
+ [
874
+ "J",
875
+ "e"
876
+ ],
877
+ [
878
+ "L",
879
+ "opma"
880
+ ],
881
+ [
882
+ "a",
883
+ "r"
884
+ ],
885
+ [
886
+ "g",
887
+ "we"
888
+ ],
889
+ [
890
+ "p",
891
+ "ar"
892
+ ],
893
+ [
894
+ "Kol",
895
+ "ina"
896
+ ],
897
+ [
898
+ "gwe",
899
+ "na"
900
+ ],
901
+ [
902
+ "par",
903
+ "le"
904
+ ]
905
+ ]
906
+ }
907
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[UNK]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[CLS]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[SEP]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[PAD]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "extra_special_tokens": {},
46
+ "model_max_length": 1000000000000000019884624838656,
47
+ "pad_token": "[PAD]",
48
+ "tokenizer_class": "PreTrainedTokenizerFast"
49
+ }