Conlanger-LLM-CLEM commited on
Commit
796c3aa
·
verified ·
1 Parent(s): 160b855

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +932 -0
  2. tokenizer_config.json +9 -0
tokenizer.json ADDED
@@ -0,0 +1,932 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [
6
+ {
7
+ "id": 0,
8
+ "content": "[UNK]",
9
+ "single_word": false,
10
+ "lstrip": false,
11
+ "rstrip": false,
12
+ "normalized": false,
13
+ "special": true
14
+ },
15
+ {
16
+ "id": 1,
17
+ "content": "[CLS]",
18
+ "single_word": false,
19
+ "lstrip": false,
20
+ "rstrip": false,
21
+ "normalized": false,
22
+ "special": true
23
+ },
24
+ {
25
+ "id": 2,
26
+ "content": "[SEP]",
27
+ "single_word": false,
28
+ "lstrip": false,
29
+ "rstrip": false,
30
+ "normalized": false,
31
+ "special": true
32
+ },
33
+ {
34
+ "id": 3,
35
+ "content": "[PAD]",
36
+ "single_word": false,
37
+ "lstrip": false,
38
+ "rstrip": false,
39
+ "normalized": false,
40
+ "special": true
41
+ },
42
+ {
43
+ "id": 4,
44
+ "content": "[MASK]",
45
+ "single_word": false,
46
+ "lstrip": false,
47
+ "rstrip": false,
48
+ "normalized": false,
49
+ "special": true
50
+ }
51
+ ],
52
+ "normalizer": null,
53
+ "pre_tokenizer": {
54
+ "type": "Whitespace"
55
+ },
56
+ "post_processor": {
57
+ "type": "TemplateProcessing",
58
+ "single": [
59
+ {
60
+ "Sequence": {
61
+ "id": "A",
62
+ "type_id": 0
63
+ }
64
+ }
65
+ ],
66
+ "pair": [
67
+ {
68
+ "Sequence": {
69
+ "id": "A",
70
+ "type_id": 0
71
+ }
72
+ },
73
+ {
74
+ "Sequence": {
75
+ "id": "B",
76
+ "type_id": 1
77
+ }
78
+ }
79
+ ],
80
+ "special_tokens": {}
81
+ },
82
+ "decoder": null,
83
+ "model": {
84
+ "type": "BPE",
85
+ "dropout": null,
86
+ "unk_token": "[UNK]",
87
+ "continuing_subword_prefix": null,
88
+ "end_of_word_suffix": null,
89
+ "fuse_unk": false,
90
+ "byte_fallback": false,
91
+ "ignore_merges": false,
92
+ "vocab": {
93
+ "[UNK]": 0,
94
+ "[CLS]": 1,
95
+ "[SEP]": 2,
96
+ "[PAD]": 3,
97
+ "[MASK]": 4,
98
+ "!": 5,
99
+ ",": 6,
100
+ ".": 7,
101
+ "A": 8,
102
+ "E": 9,
103
+ "F": 10,
104
+ "G": 11,
105
+ "H": 12,
106
+ "J": 13,
107
+ "K": 14,
108
+ "L": 15,
109
+ "M": 16,
110
+ "N": 17,
111
+ "O": 18,
112
+ "P": 19,
113
+ "R": 20,
114
+ "S": 21,
115
+ "T": 22,
116
+ "a": 23,
117
+ "d": 24,
118
+ "e": 25,
119
+ "f": 26,
120
+ "g": 27,
121
+ "h": 28,
122
+ "i": 29,
123
+ "j": 30,
124
+ "k": 31,
125
+ "l": 32,
126
+ "m": 33,
127
+ "n": 34,
128
+ "o": 35,
129
+ "p": 36,
130
+ "r": 37,
131
+ "s": 38,
132
+ "t": 39,
133
+ "u": 40,
134
+ "w": 41,
135
+ "x": 42,
136
+ "y": 43,
137
+ "z": 44,
138
+ "na": 45,
139
+ "ra": 46,
140
+ "as": 47,
141
+ "mo": 48,
142
+ "it": 49,
143
+ "ita": 50,
144
+ "ol": 51,
145
+ "ia": 52,
146
+ "ina": 53,
147
+ "fe": 54,
148
+ "op": 55,
149
+ "ki": 56,
150
+ "sa": 57,
151
+ "ola": 58,
152
+ "rita": 59,
153
+ "li": 60,
154
+ "az": 61,
155
+ "er": 62,
156
+ "lola": 63,
157
+ "itana": 64,
158
+ "opa": 65,
159
+ "nu": 66,
160
+ "nus": 67,
161
+ "titana": 68,
162
+ "popa": 69,
163
+ "mona": 70,
164
+ "fenus": 71,
165
+ "liia": 72,
166
+ "ni": 73,
167
+ "asna": 74,
168
+ "momo": 75,
169
+ "aze": 76,
170
+ "iara": 77,
171
+ "fera": 78,
172
+ "nina": 79,
173
+ "momorita": 80,
174
+ "aski": 81,
175
+ "ma": 82,
176
+ "we": 83,
177
+ "kiara": 84,
178
+ "ro": 85,
179
+ "wen": 86,
180
+ "kol": 87,
181
+ "ll": 88,
182
+ "llia": 89,
183
+ "era": 90,
184
+ "ga": 91,
185
+ "kira": 92,
186
+ "sallia": 93,
187
+ "ka": 94,
188
+ "olga": 95,
189
+ "ona": 96,
190
+ "wx": 97,
191
+ "yna": 98,
192
+ "erina": 99,
193
+ "wxa": 100,
194
+ "gwen": 101,
195
+ "ie": 102,
196
+ "rosa": 103,
197
+ "As": 104,
198
+ "hi": 105,
199
+ "hyna": 106,
200
+ "aska": 107,
201
+ "ashi": 108,
202
+ "mwxa": 109,
203
+ "uni": 110,
204
+ "gwenna": 111,
205
+ "asni": 112,
206
+ "erie": 113,
207
+ "kina": 114,
208
+ "eera": 115,
209
+ "opma": 116,
210
+ "kolina": 117,
211
+ "in": 118,
212
+ "jona": 119,
213
+ "lopma": 120,
214
+ "azin": 121,
215
+ "azinn": 122,
216
+ "una": 123,
217
+ "Lola": 124,
218
+ "jeera": 125,
219
+ "ja": 126,
220
+ "kolma": 127,
221
+ "Mo": 128,
222
+ "da": 129,
223
+ "de": 130,
224
+ "lisa": 131,
225
+ "delisa": 132,
226
+ "juna": 133,
227
+ "suni": 134,
228
+ "asnina": 135,
229
+ "Er": 136,
230
+ "se": 137,
231
+ "Fera": 138,
232
+ "lazinn": 139,
233
+ "Suni": 140,
234
+ "erra": 141,
235
+ "momodelisa": 142,
236
+ "Rita": 143,
237
+ "Aski": 144,
238
+ "Momo": 145,
239
+ "Kiara": 146,
240
+ "Gwen": 147,
241
+ "Kina": 148,
242
+ "Nina": 149,
243
+ "Ro": 150,
244
+ "rose": 151,
245
+ "Lazinn": 152,
246
+ "Erie": 153,
247
+ "Jona": 154,
248
+ "Sa": 155,
249
+ "Asni": 156,
250
+ "Juna": 157,
251
+ "Mwxa": 158,
252
+ "lli": 159,
253
+ "Aska": 160,
254
+ "Mora": 161,
255
+ "Rose": 162,
256
+ "Kol": 163,
257
+ "Li": 164,
258
+ "Mona": 165,
259
+ "Asna": 166,
260
+ "Erra": 167,
261
+ "Liia": 168,
262
+ "Fe": 169,
263
+ "Jeera": 170,
264
+ "Ki": 171,
265
+ "Popa": 172,
266
+ "mora": 173,
267
+ "Momodelisa": 174,
268
+ "Gwenna": 175,
269
+ "Rosa": 176,
270
+ "Sallia": 177,
271
+ "Salli": 178,
272
+ "Kolma": 179,
273
+ "Fenus": 180,
274
+ "Kira": 181,
275
+ "Az": 182,
276
+ "Hyna": 183,
277
+ "Titana": 184,
278
+ "Asnina": 185,
279
+ "Aze": 186,
280
+ "Ol": 187,
281
+ "le": 188,
282
+ "salli": 189,
283
+ "Ashi": 190,
284
+ "Erina": 191,
285
+ "Olga": 192,
286
+ "..": 193,
287
+ "Ja": 194,
288
+ "Je": 195,
289
+ "Lopma": 196,
290
+ "ar": 197,
291
+ "gwe": 198,
292
+ "par": 199,
293
+ "Kolina": 200,
294
+ "gwena": 201,
295
+ "parle": 202
296
+ },
297
+ "merges": [
298
+ [
299
+ "n",
300
+ "a"
301
+ ],
302
+ [
303
+ "r",
304
+ "a"
305
+ ],
306
+ [
307
+ "a",
308
+ "s"
309
+ ],
310
+ [
311
+ "m",
312
+ "o"
313
+ ],
314
+ [
315
+ "i",
316
+ "t"
317
+ ],
318
+ [
319
+ "it",
320
+ "a"
321
+ ],
322
+ [
323
+ "o",
324
+ "l"
325
+ ],
326
+ [
327
+ "i",
328
+ "a"
329
+ ],
330
+ [
331
+ "i",
332
+ "na"
333
+ ],
334
+ [
335
+ "f",
336
+ "e"
337
+ ],
338
+ [
339
+ "o",
340
+ "p"
341
+ ],
342
+ [
343
+ "k",
344
+ "i"
345
+ ],
346
+ [
347
+ "s",
348
+ "a"
349
+ ],
350
+ [
351
+ "ol",
352
+ "a"
353
+ ],
354
+ [
355
+ "r",
356
+ "ita"
357
+ ],
358
+ [
359
+ "l",
360
+ "i"
361
+ ],
362
+ [
363
+ "a",
364
+ "z"
365
+ ],
366
+ [
367
+ "e",
368
+ "r"
369
+ ],
370
+ [
371
+ "l",
372
+ "ola"
373
+ ],
374
+ [
375
+ "ita",
376
+ "na"
377
+ ],
378
+ [
379
+ "op",
380
+ "a"
381
+ ],
382
+ [
383
+ "n",
384
+ "u"
385
+ ],
386
+ [
387
+ "nu",
388
+ "s"
389
+ ],
390
+ [
391
+ "t",
392
+ "itana"
393
+ ],
394
+ [
395
+ "p",
396
+ "opa"
397
+ ],
398
+ [
399
+ "mo",
400
+ "na"
401
+ ],
402
+ [
403
+ "fe",
404
+ "nus"
405
+ ],
406
+ [
407
+ "li",
408
+ "ia"
409
+ ],
410
+ [
411
+ "n",
412
+ "i"
413
+ ],
414
+ [
415
+ "as",
416
+ "na"
417
+ ],
418
+ [
419
+ "mo",
420
+ "mo"
421
+ ],
422
+ [
423
+ "az",
424
+ "e"
425
+ ],
426
+ [
427
+ "ia",
428
+ "ra"
429
+ ],
430
+ [
431
+ "fe",
432
+ "ra"
433
+ ],
434
+ [
435
+ "n",
436
+ "ina"
437
+ ],
438
+ [
439
+ "momo",
440
+ "rita"
441
+ ],
442
+ [
443
+ "as",
444
+ "ki"
445
+ ],
446
+ [
447
+ "m",
448
+ "a"
449
+ ],
450
+ [
451
+ "w",
452
+ "e"
453
+ ],
454
+ [
455
+ "k",
456
+ "iara"
457
+ ],
458
+ [
459
+ "r",
460
+ "o"
461
+ ],
462
+ [
463
+ "we",
464
+ "n"
465
+ ],
466
+ [
467
+ "k",
468
+ "ol"
469
+ ],
470
+ [
471
+ "l",
472
+ "l"
473
+ ],
474
+ [
475
+ "ll",
476
+ "ia"
477
+ ],
478
+ [
479
+ "e",
480
+ "ra"
481
+ ],
482
+ [
483
+ "g",
484
+ "a"
485
+ ],
486
+ [
487
+ "ki",
488
+ "ra"
489
+ ],
490
+ [
491
+ "sa",
492
+ "llia"
493
+ ],
494
+ [
495
+ "k",
496
+ "a"
497
+ ],
498
+ [
499
+ "ol",
500
+ "ga"
501
+ ],
502
+ [
503
+ "o",
504
+ "na"
505
+ ],
506
+ [
507
+ "w",
508
+ "x"
509
+ ],
510
+ [
511
+ "y",
512
+ "na"
513
+ ],
514
+ [
515
+ "er",
516
+ "ina"
517
+ ],
518
+ [
519
+ "wx",
520
+ "a"
521
+ ],
522
+ [
523
+ "g",
524
+ "wen"
525
+ ],
526
+ [
527
+ "i",
528
+ "e"
529
+ ],
530
+ [
531
+ "ro",
532
+ "sa"
533
+ ],
534
+ [
535
+ "A",
536
+ "s"
537
+ ],
538
+ [
539
+ "h",
540
+ "i"
541
+ ],
542
+ [
543
+ "h",
544
+ "yna"
545
+ ],
546
+ [
547
+ "as",
548
+ "ka"
549
+ ],
550
+ [
551
+ "as",
552
+ "hi"
553
+ ],
554
+ [
555
+ "m",
556
+ "wxa"
557
+ ],
558
+ [
559
+ "u",
560
+ "ni"
561
+ ],
562
+ [
563
+ "gwen",
564
+ "na"
565
+ ],
566
+ [
567
+ "as",
568
+ "ni"
569
+ ],
570
+ [
571
+ "er",
572
+ "ie"
573
+ ],
574
+ [
575
+ "k",
576
+ "ina"
577
+ ],
578
+ [
579
+ "e",
580
+ "era"
581
+ ],
582
+ [
583
+ "op",
584
+ "ma"
585
+ ],
586
+ [
587
+ "kol",
588
+ "ina"
589
+ ],
590
+ [
591
+ "i",
592
+ "n"
593
+ ],
594
+ [
595
+ "j",
596
+ "ona"
597
+ ],
598
+ [
599
+ "l",
600
+ "opma"
601
+ ],
602
+ [
603
+ "az",
604
+ "in"
605
+ ],
606
+ [
607
+ "azin",
608
+ "n"
609
+ ],
610
+ [
611
+ "u",
612
+ "na"
613
+ ],
614
+ [
615
+ "L",
616
+ "ola"
617
+ ],
618
+ [
619
+ "j",
620
+ "eera"
621
+ ],
622
+ [
623
+ "j",
624
+ "a"
625
+ ],
626
+ [
627
+ "kol",
628
+ "ma"
629
+ ],
630
+ [
631
+ "M",
632
+ "o"
633
+ ],
634
+ [
635
+ "d",
636
+ "a"
637
+ ],
638
+ [
639
+ "d",
640
+ "e"
641
+ ],
642
+ [
643
+ "li",
644
+ "sa"
645
+ ],
646
+ [
647
+ "de",
648
+ "lisa"
649
+ ],
650
+ [
651
+ "j",
652
+ "una"
653
+ ],
654
+ [
655
+ "s",
656
+ "uni"
657
+ ],
658
+ [
659
+ "as",
660
+ "nina"
661
+ ],
662
+ [
663
+ "E",
664
+ "r"
665
+ ],
666
+ [
667
+ "s",
668
+ "e"
669
+ ],
670
+ [
671
+ "F",
672
+ "era"
673
+ ],
674
+ [
675
+ "l",
676
+ "azinn"
677
+ ],
678
+ [
679
+ "S",
680
+ "uni"
681
+ ],
682
+ [
683
+ "er",
684
+ "ra"
685
+ ],
686
+ [
687
+ "momo",
688
+ "delisa"
689
+ ],
690
+ [
691
+ "R",
692
+ "ita"
693
+ ],
694
+ [
695
+ "As",
696
+ "ki"
697
+ ],
698
+ [
699
+ "Mo",
700
+ "mo"
701
+ ],
702
+ [
703
+ "K",
704
+ "iara"
705
+ ],
706
+ [
707
+ "G",
708
+ "wen"
709
+ ],
710
+ [
711
+ "K",
712
+ "ina"
713
+ ],
714
+ [
715
+ "N",
716
+ "ina"
717
+ ],
718
+ [
719
+ "R",
720
+ "o"
721
+ ],
722
+ [
723
+ "ro",
724
+ "se"
725
+ ],
726
+ [
727
+ "L",
728
+ "azinn"
729
+ ],
730
+ [
731
+ "Er",
732
+ "ie"
733
+ ],
734
+ [
735
+ "J",
736
+ "ona"
737
+ ],
738
+ [
739
+ "S",
740
+ "a"
741
+ ],
742
+ [
743
+ "As",
744
+ "ni"
745
+ ],
746
+ [
747
+ "J",
748
+ "una"
749
+ ],
750
+ [
751
+ "M",
752
+ "wxa"
753
+ ],
754
+ [
755
+ "l",
756
+ "li"
757
+ ],
758
+ [
759
+ "As",
760
+ "ka"
761
+ ],
762
+ [
763
+ "Mo",
764
+ "ra"
765
+ ],
766
+ [
767
+ "Ro",
768
+ "se"
769
+ ],
770
+ [
771
+ "K",
772
+ "ol"
773
+ ],
774
+ [
775
+ "L",
776
+ "i"
777
+ ],
778
+ [
779
+ "M",
780
+ "ona"
781
+ ],
782
+ [
783
+ "As",
784
+ "na"
785
+ ],
786
+ [
787
+ "Er",
788
+ "ra"
789
+ ],
790
+ [
791
+ "Li",
792
+ "ia"
793
+ ],
794
+ [
795
+ "F",
796
+ "e"
797
+ ],
798
+ [
799
+ "J",
800
+ "eera"
801
+ ],
802
+ [
803
+ "K",
804
+ "i"
805
+ ],
806
+ [
807
+ "P",
808
+ "opa"
809
+ ],
810
+ [
811
+ "mo",
812
+ "ra"
813
+ ],
814
+ [
815
+ "Momo",
816
+ "delisa"
817
+ ],
818
+ [
819
+ "Gwen",
820
+ "na"
821
+ ],
822
+ [
823
+ "Ro",
824
+ "sa"
825
+ ],
826
+ [
827
+ "Sa",
828
+ "llia"
829
+ ],
830
+ [
831
+ "Sa",
832
+ "lli"
833
+ ],
834
+ [
835
+ "Kol",
836
+ "ma"
837
+ ],
838
+ [
839
+ "Fe",
840
+ "nus"
841
+ ],
842
+ [
843
+ "Ki",
844
+ "ra"
845
+ ],
846
+ [
847
+ "A",
848
+ "z"
849
+ ],
850
+ [
851
+ "H",
852
+ "yna"
853
+ ],
854
+ [
855
+ "T",
856
+ "itana"
857
+ ],
858
+ [
859
+ "As",
860
+ "nina"
861
+ ],
862
+ [
863
+ "Az",
864
+ "e"
865
+ ],
866
+ [
867
+ "O",
868
+ "l"
869
+ ],
870
+ [
871
+ "l",
872
+ "e"
873
+ ],
874
+ [
875
+ "sa",
876
+ "lli"
877
+ ],
878
+ [
879
+ "As",
880
+ "hi"
881
+ ],
882
+ [
883
+ "Er",
884
+ "ina"
885
+ ],
886
+ [
887
+ "Ol",
888
+ "ga"
889
+ ],
890
+ [
891
+ ".",
892
+ "."
893
+ ],
894
+ [
895
+ "J",
896
+ "a"
897
+ ],
898
+ [
899
+ "J",
900
+ "e"
901
+ ],
902
+ [
903
+ "L",
904
+ "opma"
905
+ ],
906
+ [
907
+ "a",
908
+ "r"
909
+ ],
910
+ [
911
+ "g",
912
+ "we"
913
+ ],
914
+ [
915
+ "p",
916
+ "ar"
917
+ ],
918
+ [
919
+ "Kol",
920
+ "ina"
921
+ ],
922
+ [
923
+ "gwe",
924
+ "na"
925
+ ],
926
+ [
927
+ "par",
928
+ "le"
929
+ ]
930
+ ]
931
+ }
932
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "clean_up_tokenization_spaces": false,
4
+ "is_local": true,
5
+ "model_max_length": 1000000000000000019884624838656,
6
+ "model_specific_special_tokens": {},
7
+ "pad_token": "[PAD]",
8
+ "tokenizer_class": "TokenizersBackend"
9
+ }