suchirsalhan commited on
Commit
8edfef9
·
verified ·
1 Parent(s): ba0017c

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. tokenizer.json +41 -1391
  2. tokenizer_config.json +1 -108
tokenizer.json CHANGED
@@ -5,907 +5,7 @@
5
  "added_tokens": [
6
  {
7
  "id": 0,
8
- "content": "<pad>",
9
- "single_word": false,
10
- "lstrip": false,
11
- "rstrip": false,
12
- "normalized": false,
13
- "special": true
14
- },
15
- {
16
- "id": 1,
17
- "content": "</s>",
18
- "single_word": false,
19
- "lstrip": false,
20
- "rstrip": false,
21
- "normalized": false,
22
- "special": true
23
- },
24
- {
25
- "id": 2,
26
- "content": "<unk>",
27
- "single_word": false,
28
- "lstrip": false,
29
- "rstrip": false,
30
- "normalized": false,
31
- "special": true
32
- },
33
- {
34
- "id": 4,
35
- "content": "<extra_id_99>",
36
- "single_word": false,
37
- "lstrip": false,
38
- "rstrip": false,
39
- "normalized": false,
40
- "special": true
41
- },
42
- {
43
- "id": 5,
44
- "content": "<extra_id_98>",
45
- "single_word": false,
46
- "lstrip": false,
47
- "rstrip": false,
48
- "normalized": false,
49
- "special": true
50
- },
51
- {
52
- "id": 6,
53
- "content": "<extra_id_97>",
54
- "single_word": false,
55
- "lstrip": false,
56
- "rstrip": false,
57
- "normalized": false,
58
- "special": true
59
- },
60
- {
61
- "id": 7,
62
- "content": "<extra_id_96>",
63
- "single_word": false,
64
- "lstrip": false,
65
- "rstrip": false,
66
- "normalized": false,
67
- "special": true
68
- },
69
- {
70
- "id": 8,
71
- "content": "<extra_id_95>",
72
- "single_word": false,
73
- "lstrip": false,
74
- "rstrip": false,
75
- "normalized": false,
76
- "special": true
77
- },
78
- {
79
- "id": 9,
80
- "content": "<extra_id_94>",
81
- "single_word": false,
82
- "lstrip": false,
83
- "rstrip": false,
84
- "normalized": false,
85
- "special": true
86
- },
87
- {
88
- "id": 10,
89
- "content": "<extra_id_93>",
90
- "single_word": false,
91
- "lstrip": false,
92
- "rstrip": false,
93
- "normalized": false,
94
- "special": true
95
- },
96
- {
97
- "id": 11,
98
- "content": "<extra_id_92>",
99
- "single_word": false,
100
- "lstrip": false,
101
- "rstrip": false,
102
- "normalized": false,
103
- "special": true
104
- },
105
- {
106
- "id": 12,
107
- "content": "<extra_id_91>",
108
- "single_word": false,
109
- "lstrip": false,
110
- "rstrip": false,
111
- "normalized": false,
112
- "special": true
113
- },
114
- {
115
- "id": 13,
116
- "content": "<extra_id_90>",
117
- "single_word": false,
118
- "lstrip": false,
119
- "rstrip": false,
120
- "normalized": false,
121
- "special": true
122
- },
123
- {
124
- "id": 14,
125
- "content": "<extra_id_89>",
126
- "single_word": false,
127
- "lstrip": false,
128
- "rstrip": false,
129
- "normalized": false,
130
- "special": true
131
- },
132
- {
133
- "id": 15,
134
- "content": "<extra_id_88>",
135
- "single_word": false,
136
- "lstrip": false,
137
- "rstrip": false,
138
- "normalized": false,
139
- "special": true
140
- },
141
- {
142
- "id": 16,
143
- "content": "<extra_id_87>",
144
- "single_word": false,
145
- "lstrip": false,
146
- "rstrip": false,
147
- "normalized": false,
148
- "special": true
149
- },
150
- {
151
- "id": 17,
152
- "content": "<extra_id_86>",
153
- "single_word": false,
154
- "lstrip": false,
155
- "rstrip": false,
156
- "normalized": false,
157
- "special": true
158
- },
159
- {
160
- "id": 18,
161
- "content": "<extra_id_85>",
162
- "single_word": false,
163
- "lstrip": false,
164
- "rstrip": false,
165
- "normalized": false,
166
- "special": true
167
- },
168
- {
169
- "id": 19,
170
- "content": "<extra_id_84>",
171
- "single_word": false,
172
- "lstrip": false,
173
- "rstrip": false,
174
- "normalized": false,
175
- "special": true
176
- },
177
- {
178
- "id": 20,
179
- "content": "<extra_id_83>",
180
- "single_word": false,
181
- "lstrip": false,
182
- "rstrip": false,
183
- "normalized": false,
184
- "special": true
185
- },
186
- {
187
- "id": 21,
188
- "content": "<extra_id_82>",
189
- "single_word": false,
190
- "lstrip": false,
191
- "rstrip": false,
192
- "normalized": false,
193
- "special": true
194
- },
195
- {
196
- "id": 22,
197
- "content": "<extra_id_81>",
198
- "single_word": false,
199
- "lstrip": false,
200
- "rstrip": false,
201
- "normalized": false,
202
- "special": true
203
- },
204
- {
205
- "id": 23,
206
- "content": "<extra_id_80>",
207
- "single_word": false,
208
- "lstrip": false,
209
- "rstrip": false,
210
- "normalized": false,
211
- "special": true
212
- },
213
- {
214
- "id": 24,
215
- "content": "<extra_id_79>",
216
- "single_word": false,
217
- "lstrip": false,
218
- "rstrip": false,
219
- "normalized": false,
220
- "special": true
221
- },
222
- {
223
- "id": 25,
224
- "content": "<extra_id_78>",
225
- "single_word": false,
226
- "lstrip": false,
227
- "rstrip": false,
228
- "normalized": false,
229
- "special": true
230
- },
231
- {
232
- "id": 26,
233
- "content": "<extra_id_77>",
234
- "single_word": false,
235
- "lstrip": false,
236
- "rstrip": false,
237
- "normalized": false,
238
- "special": true
239
- },
240
- {
241
- "id": 27,
242
- "content": "<extra_id_76>",
243
- "single_word": false,
244
- "lstrip": false,
245
- "rstrip": false,
246
- "normalized": false,
247
- "special": true
248
- },
249
- {
250
- "id": 28,
251
- "content": "<extra_id_75>",
252
- "single_word": false,
253
- "lstrip": false,
254
- "rstrip": false,
255
- "normalized": false,
256
- "special": true
257
- },
258
- {
259
- "id": 29,
260
- "content": "<extra_id_74>",
261
- "single_word": false,
262
- "lstrip": false,
263
- "rstrip": false,
264
- "normalized": false,
265
- "special": true
266
- },
267
- {
268
- "id": 30,
269
- "content": "<extra_id_73>",
270
- "single_word": false,
271
- "lstrip": false,
272
- "rstrip": false,
273
- "normalized": false,
274
- "special": true
275
- },
276
- {
277
- "id": 31,
278
- "content": "<extra_id_72>",
279
- "single_word": false,
280
- "lstrip": false,
281
- "rstrip": false,
282
- "normalized": false,
283
- "special": true
284
- },
285
- {
286
- "id": 32,
287
- "content": "<extra_id_71>",
288
- "single_word": false,
289
- "lstrip": false,
290
- "rstrip": false,
291
- "normalized": false,
292
- "special": true
293
- },
294
- {
295
- "id": 33,
296
- "content": "<extra_id_70>",
297
- "single_word": false,
298
- "lstrip": false,
299
- "rstrip": false,
300
- "normalized": false,
301
- "special": true
302
- },
303
- {
304
- "id": 34,
305
- "content": "<extra_id_69>",
306
- "single_word": false,
307
- "lstrip": false,
308
- "rstrip": false,
309
- "normalized": false,
310
- "special": true
311
- },
312
- {
313
- "id": 35,
314
- "content": "<extra_id_68>",
315
- "single_word": false,
316
- "lstrip": false,
317
- "rstrip": false,
318
- "normalized": false,
319
- "special": true
320
- },
321
- {
322
- "id": 36,
323
- "content": "<extra_id_67>",
324
- "single_word": false,
325
- "lstrip": false,
326
- "rstrip": false,
327
- "normalized": false,
328
- "special": true
329
- },
330
- {
331
- "id": 37,
332
- "content": "<extra_id_66>",
333
- "single_word": false,
334
- "lstrip": false,
335
- "rstrip": false,
336
- "normalized": false,
337
- "special": true
338
- },
339
- {
340
- "id": 38,
341
- "content": "<extra_id_65>",
342
- "single_word": false,
343
- "lstrip": false,
344
- "rstrip": false,
345
- "normalized": false,
346
- "special": true
347
- },
348
- {
349
- "id": 39,
350
- "content": "<extra_id_64>",
351
- "single_word": false,
352
- "lstrip": false,
353
- "rstrip": false,
354
- "normalized": false,
355
- "special": true
356
- },
357
- {
358
- "id": 40,
359
- "content": "<extra_id_63>",
360
- "single_word": false,
361
- "lstrip": false,
362
- "rstrip": false,
363
- "normalized": false,
364
- "special": true
365
- },
366
- {
367
- "id": 41,
368
- "content": "<extra_id_62>",
369
- "single_word": false,
370
- "lstrip": false,
371
- "rstrip": false,
372
- "normalized": false,
373
- "special": true
374
- },
375
- {
376
- "id": 42,
377
- "content": "<extra_id_61>",
378
- "single_word": false,
379
- "lstrip": false,
380
- "rstrip": false,
381
- "normalized": false,
382
- "special": true
383
- },
384
- {
385
- "id": 43,
386
- "content": "<extra_id_60>",
387
- "single_word": false,
388
- "lstrip": false,
389
- "rstrip": false,
390
- "normalized": false,
391
- "special": true
392
- },
393
- {
394
- "id": 44,
395
- "content": "<extra_id_59>",
396
- "single_word": false,
397
- "lstrip": false,
398
- "rstrip": false,
399
- "normalized": false,
400
- "special": true
401
- },
402
- {
403
- "id": 45,
404
- "content": "<extra_id_58>",
405
- "single_word": false,
406
- "lstrip": false,
407
- "rstrip": false,
408
- "normalized": false,
409
- "special": true
410
- },
411
- {
412
- "id": 46,
413
- "content": "<extra_id_57>",
414
- "single_word": false,
415
- "lstrip": false,
416
- "rstrip": false,
417
- "normalized": false,
418
- "special": true
419
- },
420
- {
421
- "id": 47,
422
- "content": "<extra_id_56>",
423
- "single_word": false,
424
- "lstrip": false,
425
- "rstrip": false,
426
- "normalized": false,
427
- "special": true
428
- },
429
- {
430
- "id": 48,
431
- "content": "<extra_id_55>",
432
- "single_word": false,
433
- "lstrip": false,
434
- "rstrip": false,
435
- "normalized": false,
436
- "special": true
437
- },
438
- {
439
- "id": 49,
440
- "content": "<extra_id_54>",
441
- "single_word": false,
442
- "lstrip": false,
443
- "rstrip": false,
444
- "normalized": false,
445
- "special": true
446
- },
447
- {
448
- "id": 50,
449
- "content": "<extra_id_53>",
450
- "single_word": false,
451
- "lstrip": false,
452
- "rstrip": false,
453
- "normalized": false,
454
- "special": true
455
- },
456
- {
457
- "id": 51,
458
- "content": "<extra_id_52>",
459
- "single_word": false,
460
- "lstrip": false,
461
- "rstrip": false,
462
- "normalized": false,
463
- "special": true
464
- },
465
- {
466
- "id": 52,
467
- "content": "<extra_id_51>",
468
- "single_word": false,
469
- "lstrip": false,
470
- "rstrip": false,
471
- "normalized": false,
472
- "special": true
473
- },
474
- {
475
- "id": 53,
476
- "content": "<extra_id_50>",
477
- "single_word": false,
478
- "lstrip": false,
479
- "rstrip": false,
480
- "normalized": false,
481
- "special": true
482
- },
483
- {
484
- "id": 54,
485
- "content": "<extra_id_49>",
486
- "single_word": false,
487
- "lstrip": false,
488
- "rstrip": false,
489
- "normalized": false,
490
- "special": true
491
- },
492
- {
493
- "id": 55,
494
- "content": "<extra_id_48>",
495
- "single_word": false,
496
- "lstrip": false,
497
- "rstrip": false,
498
- "normalized": false,
499
- "special": true
500
- },
501
- {
502
- "id": 56,
503
- "content": "<extra_id_47>",
504
- "single_word": false,
505
- "lstrip": false,
506
- "rstrip": false,
507
- "normalized": false,
508
- "special": true
509
- },
510
- {
511
- "id": 57,
512
- "content": "<extra_id_46>",
513
- "single_word": false,
514
- "lstrip": false,
515
- "rstrip": false,
516
- "normalized": false,
517
- "special": true
518
- },
519
- {
520
- "id": 58,
521
- "content": "<extra_id_45>",
522
- "single_word": false,
523
- "lstrip": false,
524
- "rstrip": false,
525
- "normalized": false,
526
- "special": true
527
- },
528
- {
529
- "id": 59,
530
- "content": "<extra_id_44>",
531
- "single_word": false,
532
- "lstrip": false,
533
- "rstrip": false,
534
- "normalized": false,
535
- "special": true
536
- },
537
- {
538
- "id": 60,
539
- "content": "<extra_id_43>",
540
- "single_word": false,
541
- "lstrip": false,
542
- "rstrip": false,
543
- "normalized": false,
544
- "special": true
545
- },
546
- {
547
- "id": 61,
548
- "content": "<extra_id_42>",
549
- "single_word": false,
550
- "lstrip": false,
551
- "rstrip": false,
552
- "normalized": false,
553
- "special": true
554
- },
555
- {
556
- "id": 62,
557
- "content": "<extra_id_41>",
558
- "single_word": false,
559
- "lstrip": false,
560
- "rstrip": false,
561
- "normalized": false,
562
- "special": true
563
- },
564
- {
565
- "id": 63,
566
- "content": "<extra_id_40>",
567
- "single_word": false,
568
- "lstrip": false,
569
- "rstrip": false,
570
- "normalized": false,
571
- "special": true
572
- },
573
- {
574
- "id": 64,
575
- "content": "<extra_id_39>",
576
- "single_word": false,
577
- "lstrip": false,
578
- "rstrip": false,
579
- "normalized": false,
580
- "special": true
581
- },
582
- {
583
- "id": 65,
584
- "content": "<extra_id_38>",
585
- "single_word": false,
586
- "lstrip": false,
587
- "rstrip": false,
588
- "normalized": false,
589
- "special": true
590
- },
591
- {
592
- "id": 66,
593
- "content": "<extra_id_37>",
594
- "single_word": false,
595
- "lstrip": false,
596
- "rstrip": false,
597
- "normalized": false,
598
- "special": true
599
- },
600
- {
601
- "id": 67,
602
- "content": "<extra_id_36>",
603
- "single_word": false,
604
- "lstrip": false,
605
- "rstrip": false,
606
- "normalized": false,
607
- "special": true
608
- },
609
- {
610
- "id": 68,
611
- "content": "<extra_id_35>",
612
- "single_word": false,
613
- "lstrip": false,
614
- "rstrip": false,
615
- "normalized": false,
616
- "special": true
617
- },
618
- {
619
- "id": 69,
620
- "content": "<extra_id_34>",
621
- "single_word": false,
622
- "lstrip": false,
623
- "rstrip": false,
624
- "normalized": false,
625
- "special": true
626
- },
627
- {
628
- "id": 70,
629
- "content": "<extra_id_33>",
630
- "single_word": false,
631
- "lstrip": false,
632
- "rstrip": false,
633
- "normalized": false,
634
- "special": true
635
- },
636
- {
637
- "id": 71,
638
- "content": "<extra_id_32>",
639
- "single_word": false,
640
- "lstrip": false,
641
- "rstrip": false,
642
- "normalized": false,
643
- "special": true
644
- },
645
- {
646
- "id": 72,
647
- "content": "<extra_id_31>",
648
- "single_word": false,
649
- "lstrip": false,
650
- "rstrip": false,
651
- "normalized": false,
652
- "special": true
653
- },
654
- {
655
- "id": 73,
656
- "content": "<extra_id_30>",
657
- "single_word": false,
658
- "lstrip": false,
659
- "rstrip": false,
660
- "normalized": false,
661
- "special": true
662
- },
663
- {
664
- "id": 74,
665
- "content": "<extra_id_29>",
666
- "single_word": false,
667
- "lstrip": false,
668
- "rstrip": false,
669
- "normalized": false,
670
- "special": true
671
- },
672
- {
673
- "id": 75,
674
- "content": "<extra_id_28>",
675
- "single_word": false,
676
- "lstrip": false,
677
- "rstrip": false,
678
- "normalized": false,
679
- "special": true
680
- },
681
- {
682
- "id": 76,
683
- "content": "<extra_id_27>",
684
- "single_word": false,
685
- "lstrip": false,
686
- "rstrip": false,
687
- "normalized": false,
688
- "special": true
689
- },
690
- {
691
- "id": 77,
692
- "content": "<extra_id_26>",
693
- "single_word": false,
694
- "lstrip": false,
695
- "rstrip": false,
696
- "normalized": false,
697
- "special": true
698
- },
699
- {
700
- "id": 78,
701
- "content": "<extra_id_25>",
702
- "single_word": false,
703
- "lstrip": false,
704
- "rstrip": false,
705
- "normalized": false,
706
- "special": true
707
- },
708
- {
709
- "id": 79,
710
- "content": "<extra_id_24>",
711
- "single_word": false,
712
- "lstrip": false,
713
- "rstrip": false,
714
- "normalized": false,
715
- "special": true
716
- },
717
- {
718
- "id": 80,
719
- "content": "<extra_id_23>",
720
- "single_word": false,
721
- "lstrip": false,
722
- "rstrip": false,
723
- "normalized": false,
724
- "special": true
725
- },
726
- {
727
- "id": 81,
728
- "content": "<extra_id_22>",
729
- "single_word": false,
730
- "lstrip": false,
731
- "rstrip": false,
732
- "normalized": false,
733
- "special": true
734
- },
735
- {
736
- "id": 82,
737
- "content": "<extra_id_21>",
738
- "single_word": false,
739
- "lstrip": false,
740
- "rstrip": false,
741
- "normalized": false,
742
- "special": true
743
- },
744
- {
745
- "id": 83,
746
- "content": "<extra_id_20>",
747
- "single_word": false,
748
- "lstrip": false,
749
- "rstrip": false,
750
- "normalized": false,
751
- "special": true
752
- },
753
- {
754
- "id": 84,
755
- "content": "<extra_id_19>",
756
- "single_word": false,
757
- "lstrip": false,
758
- "rstrip": false,
759
- "normalized": false,
760
- "special": true
761
- },
762
- {
763
- "id": 85,
764
- "content": "<extra_id_18>",
765
- "single_word": false,
766
- "lstrip": false,
767
- "rstrip": false,
768
- "normalized": false,
769
- "special": true
770
- },
771
- {
772
- "id": 86,
773
- "content": "<extra_id_17>",
774
- "single_word": false,
775
- "lstrip": false,
776
- "rstrip": false,
777
- "normalized": false,
778
- "special": true
779
- },
780
- {
781
- "id": 87,
782
- "content": "<extra_id_16>",
783
- "single_word": false,
784
- "lstrip": false,
785
- "rstrip": false,
786
- "normalized": false,
787
- "special": true
788
- },
789
- {
790
- "id": 88,
791
- "content": "<extra_id_15>",
792
- "single_word": false,
793
- "lstrip": false,
794
- "rstrip": false,
795
- "normalized": false,
796
- "special": true
797
- },
798
- {
799
- "id": 89,
800
- "content": "<extra_id_14>",
801
- "single_word": false,
802
- "lstrip": false,
803
- "rstrip": false,
804
- "normalized": false,
805
- "special": true
806
- },
807
- {
808
- "id": 90,
809
- "content": "<extra_id_13>",
810
- "single_word": false,
811
- "lstrip": false,
812
- "rstrip": false,
813
- "normalized": false,
814
- "special": true
815
- },
816
- {
817
- "id": 91,
818
- "content": "<extra_id_12>",
819
- "single_word": false,
820
- "lstrip": false,
821
- "rstrip": false,
822
- "normalized": false,
823
- "special": true
824
- },
825
- {
826
- "id": 92,
827
- "content": "<extra_id_11>",
828
- "single_word": false,
829
- "lstrip": false,
830
- "rstrip": false,
831
- "normalized": false,
832
- "special": true
833
- },
834
- {
835
- "id": 93,
836
- "content": "<extra_id_10>",
837
- "single_word": false,
838
- "lstrip": false,
839
- "rstrip": false,
840
- "normalized": false,
841
- "special": true
842
- },
843
- {
844
- "id": 94,
845
- "content": "<extra_id_9>",
846
- "single_word": false,
847
- "lstrip": false,
848
- "rstrip": false,
849
- "normalized": false,
850
- "special": true
851
- },
852
- {
853
- "id": 95,
854
- "content": "<extra_id_8>",
855
- "single_word": false,
856
- "lstrip": false,
857
- "rstrip": false,
858
- "normalized": false,
859
- "special": true
860
- },
861
- {
862
- "id": 96,
863
- "content": "<extra_id_7>",
864
- "single_word": false,
865
- "lstrip": false,
866
- "rstrip": false,
867
- "normalized": false,
868
- "special": true
869
- },
870
- {
871
- "id": 97,
872
- "content": "<extra_id_6>",
873
- "single_word": false,
874
- "lstrip": false,
875
- "rstrip": false,
876
- "normalized": false,
877
- "special": true
878
- },
879
- {
880
- "id": 98,
881
- "content": "<extra_id_5>",
882
- "single_word": false,
883
- "lstrip": false,
884
- "rstrip": false,
885
- "normalized": false,
886
- "special": true
887
- },
888
- {
889
- "id": 99,
890
- "content": "<extra_id_4>",
891
- "single_word": false,
892
- "lstrip": false,
893
- "rstrip": false,
894
- "normalized": false,
895
- "special": true
896
- },
897
- {
898
- "id": 100,
899
- "content": "<extra_id_3>",
900
- "single_word": false,
901
- "lstrip": false,
902
- "rstrip": false,
903
- "normalized": false,
904
- "special": true
905
- },
906
- {
907
- "id": 101,
908
- "content": "<extra_id_2>",
909
  "single_word": false,
910
  "lstrip": false,
911
  "rstrip": false,
@@ -913,8 +13,8 @@
913
  "special": true
914
  },
915
  {
916
- "id": 102,
917
- "content": "<extra_id_1>",
918
  "single_word": false,
919
  "lstrip": false,
920
  "rstrip": false,
@@ -922,8 +22,8 @@
922
  "special": true
923
  },
924
  {
925
- "id": 103,
926
- "content": "<extra_id_0>",
927
  "single_word": false,
928
  "lstrip": false,
929
  "rstrip": false,
@@ -931,8 +31,8 @@
931
  "special": true
932
  },
933
  {
934
- "id": 104,
935
- "content": "<s>",
936
  "single_word": false,
937
  "lstrip": false,
938
  "rstrip": false,
@@ -942,500 +42,50 @@
942
  ],
943
  "normalizer": null,
944
  "pre_tokenizer": {
945
- "type": "Sequence",
946
- "pretokenizers": [
947
- {
948
- "type": "WhitespaceSplit"
949
- },
950
- {
951
- "type": "Metaspace",
952
- "replacement": "▁",
953
- "prepend_scheme": "always",
954
- "split": true
955
- }
956
- ]
957
  },
958
- "post_processor": {
959
- "type": "TemplateProcessing",
960
- "single": [
961
- {
962
- "Sequence": {
963
- "id": "A",
964
- "type_id": 0
965
- }
966
- },
967
- {
968
- "SpecialToken": {
969
- "id": "</s>",
970
- "type_id": 0
971
- }
972
- }
973
- ],
974
- "pair": [
975
  {
976
- "Sequence": {
977
- "id": "A",
978
- "type_id": 0
979
- }
 
980
  },
981
  {
982
- "SpecialToken": {
983
- "id": "</s>",
984
- "type_id": 0
985
- }
986
  },
987
  {
988
- "Sequence": {
989
- "id": "B",
990
- "type_id": 0
991
- }
992
  },
993
  {
994
- "SpecialToken": {
995
- "id": "</s>",
996
- "type_id": 0
997
- }
998
- }
999
- ],
1000
- "special_tokens": {
1001
- "</s>": {
1002
- "id": "</s>",
1003
- "ids": [
1004
- 1
1005
- ],
1006
- "tokens": [
1007
- "</s>"
1008
- ]
1009
  }
1010
- }
1011
- },
1012
- "decoder": {
1013
- "type": "Metaspace",
1014
- "replacement": "▁",
1015
- "prepend_scheme": "always",
1016
- "split": true
1017
  },
1018
  "model": {
1019
- "type": "Unigram",
1020
- "unk_id": 2,
1021
- "vocab": [
1022
- [
1023
- "<pad>",
1024
- 0.0
1025
- ],
1026
- [
1027
- "</s>",
1028
- 0.0
1029
- ],
1030
- [
1031
- "<unk>",
1032
- 0.0
1033
- ],
1034
- [
1035
- "▁",
1036
- -2.0
1037
- ],
1038
- [
1039
- "<extra_id_99>",
1040
- 0.0
1041
- ],
1042
- [
1043
- "<extra_id_98>",
1044
- 0.0
1045
- ],
1046
- [
1047
- "<extra_id_97>",
1048
- 0.0
1049
- ],
1050
- [
1051
- "<extra_id_96>",
1052
- 0.0
1053
- ],
1054
- [
1055
- "<extra_id_95>",
1056
- 0.0
1057
- ],
1058
- [
1059
- "<extra_id_94>",
1060
- 0.0
1061
- ],
1062
- [
1063
- "<extra_id_93>",
1064
- 0.0
1065
- ],
1066
- [
1067
- "<extra_id_92>",
1068
- 0.0
1069
- ],
1070
- [
1071
- "<extra_id_91>",
1072
- 0.0
1073
- ],
1074
- [
1075
- "<extra_id_90>",
1076
- 0.0
1077
- ],
1078
- [
1079
- "<extra_id_89>",
1080
- 0.0
1081
- ],
1082
- [
1083
- "<extra_id_88>",
1084
- 0.0
1085
- ],
1086
- [
1087
- "<extra_id_87>",
1088
- 0.0
1089
- ],
1090
- [
1091
- "<extra_id_86>",
1092
- 0.0
1093
- ],
1094
- [
1095
- "<extra_id_85>",
1096
- 0.0
1097
- ],
1098
- [
1099
- "<extra_id_84>",
1100
- 0.0
1101
- ],
1102
- [
1103
- "<extra_id_83>",
1104
- 0.0
1105
- ],
1106
- [
1107
- "<extra_id_82>",
1108
- 0.0
1109
- ],
1110
- [
1111
- "<extra_id_81>",
1112
- 0.0
1113
- ],
1114
- [
1115
- "<extra_id_80>",
1116
- 0.0
1117
- ],
1118
- [
1119
- "<extra_id_79>",
1120
- 0.0
1121
- ],
1122
- [
1123
- "<extra_id_78>",
1124
- 0.0
1125
- ],
1126
- [
1127
- "<extra_id_77>",
1128
- 0.0
1129
- ],
1130
- [
1131
- "<extra_id_76>",
1132
- 0.0
1133
- ],
1134
- [
1135
- "<extra_id_75>",
1136
- 0.0
1137
- ],
1138
- [
1139
- "<extra_id_74>",
1140
- 0.0
1141
- ],
1142
- [
1143
- "<extra_id_73>",
1144
- 0.0
1145
- ],
1146
- [
1147
- "<extra_id_72>",
1148
- 0.0
1149
- ],
1150
- [
1151
- "<extra_id_71>",
1152
- 0.0
1153
- ],
1154
- [
1155
- "<extra_id_70>",
1156
- 0.0
1157
- ],
1158
- [
1159
- "<extra_id_69>",
1160
- 0.0
1161
- ],
1162
- [
1163
- "<extra_id_68>",
1164
- 0.0
1165
- ],
1166
- [
1167
- "<extra_id_67>",
1168
- 0.0
1169
- ],
1170
- [
1171
- "<extra_id_66>",
1172
- 0.0
1173
- ],
1174
- [
1175
- "<extra_id_65>",
1176
- 0.0
1177
- ],
1178
- [
1179
- "<extra_id_64>",
1180
- 0.0
1181
- ],
1182
- [
1183
- "<extra_id_63>",
1184
- 0.0
1185
- ],
1186
- [
1187
- "<extra_id_62>",
1188
- 0.0
1189
- ],
1190
- [
1191
- "<extra_id_61>",
1192
- 0.0
1193
- ],
1194
- [
1195
- "<extra_id_60>",
1196
- 0.0
1197
- ],
1198
- [
1199
- "<extra_id_59>",
1200
- 0.0
1201
- ],
1202
- [
1203
- "<extra_id_58>",
1204
- 0.0
1205
- ],
1206
- [
1207
- "<extra_id_57>",
1208
- 0.0
1209
- ],
1210
- [
1211
- "<extra_id_56>",
1212
- 0.0
1213
- ],
1214
- [
1215
- "<extra_id_55>",
1216
- 0.0
1217
- ],
1218
- [
1219
- "<extra_id_54>",
1220
- 0.0
1221
- ],
1222
- [
1223
- "<extra_id_53>",
1224
- 0.0
1225
- ],
1226
- [
1227
- "<extra_id_52>",
1228
- 0.0
1229
- ],
1230
- [
1231
- "<extra_id_51>",
1232
- 0.0
1233
- ],
1234
- [
1235
- "<extra_id_50>",
1236
- 0.0
1237
- ],
1238
- [
1239
- "<extra_id_49>",
1240
- 0.0
1241
- ],
1242
- [
1243
- "<extra_id_48>",
1244
- 0.0
1245
- ],
1246
- [
1247
- "<extra_id_47>",
1248
- 0.0
1249
- ],
1250
- [
1251
- "<extra_id_46>",
1252
- 0.0
1253
- ],
1254
- [
1255
- "<extra_id_45>",
1256
- 0.0
1257
- ],
1258
- [
1259
- "<extra_id_44>",
1260
- 0.0
1261
- ],
1262
- [
1263
- "<extra_id_43>",
1264
- 0.0
1265
- ],
1266
- [
1267
- "<extra_id_42>",
1268
- 0.0
1269
- ],
1270
- [
1271
- "<extra_id_41>",
1272
- 0.0
1273
- ],
1274
- [
1275
- "<extra_id_40>",
1276
- 0.0
1277
- ],
1278
- [
1279
- "<extra_id_39>",
1280
- 0.0
1281
- ],
1282
- [
1283
- "<extra_id_38>",
1284
- 0.0
1285
- ],
1286
- [
1287
- "<extra_id_37>",
1288
- 0.0
1289
- ],
1290
- [
1291
- "<extra_id_36>",
1292
- 0.0
1293
- ],
1294
- [
1295
- "<extra_id_35>",
1296
- 0.0
1297
- ],
1298
- [
1299
- "<extra_id_34>",
1300
- 0.0
1301
- ],
1302
- [
1303
- "<extra_id_33>",
1304
- 0.0
1305
- ],
1306
- [
1307
- "<extra_id_32>",
1308
- 0.0
1309
- ],
1310
- [
1311
- "<extra_id_31>",
1312
- 0.0
1313
- ],
1314
- [
1315
- "<extra_id_30>",
1316
- 0.0
1317
- ],
1318
- [
1319
- "<extra_id_29>",
1320
- 0.0
1321
- ],
1322
- [
1323
- "<extra_id_28>",
1324
- 0.0
1325
- ],
1326
- [
1327
- "<extra_id_27>",
1328
- 0.0
1329
- ],
1330
- [
1331
- "<extra_id_26>",
1332
- 0.0
1333
- ],
1334
- [
1335
- "<extra_id_25>",
1336
- 0.0
1337
- ],
1338
- [
1339
- "<extra_id_24>",
1340
- 0.0
1341
- ],
1342
- [
1343
- "<extra_id_23>",
1344
- 0.0
1345
- ],
1346
- [
1347
- "<extra_id_22>",
1348
- 0.0
1349
- ],
1350
- [
1351
- "<extra_id_21>",
1352
- 0.0
1353
- ],
1354
- [
1355
- "<extra_id_20>",
1356
- 0.0
1357
- ],
1358
- [
1359
- "<extra_id_19>",
1360
- 0.0
1361
- ],
1362
- [
1363
- "<extra_id_18>",
1364
- 0.0
1365
- ],
1366
- [
1367
- "<extra_id_17>",
1368
- 0.0
1369
- ],
1370
- [
1371
- "<extra_id_16>",
1372
- 0.0
1373
- ],
1374
- [
1375
- "<extra_id_15>",
1376
- 0.0
1377
- ],
1378
- [
1379
- "<extra_id_14>",
1380
- 0.0
1381
- ],
1382
- [
1383
- "<extra_id_13>",
1384
- 0.0
1385
- ],
1386
- [
1387
- "<extra_id_12>",
1388
- 0.0
1389
- ],
1390
- [
1391
- "<extra_id_11>",
1392
- 0.0
1393
- ],
1394
- [
1395
- "<extra_id_10>",
1396
- 0.0
1397
- ],
1398
- [
1399
- "<extra_id_9>",
1400
- 0.0
1401
- ],
1402
- [
1403
- "<extra_id_8>",
1404
- 0.0
1405
- ],
1406
- [
1407
- "<extra_id_7>",
1408
- 0.0
1409
- ],
1410
- [
1411
- "<extra_id_6>",
1412
- 0.0
1413
- ],
1414
- [
1415
- "<extra_id_5>",
1416
- 0.0
1417
- ],
1418
- [
1419
- "<extra_id_4>",
1420
- 0.0
1421
- ],
1422
- [
1423
- "<extra_id_3>",
1424
- 0.0
1425
- ],
1426
- [
1427
- "<extra_id_2>",
1428
- 0.0
1429
- ],
1430
- [
1431
- "<extra_id_1>",
1432
- 0.0
1433
- ],
1434
- [
1435
- "<extra_id_0>",
1436
- 0.0
1437
- ]
1438
- ],
1439
- "byte_fallback": false
1440
  }
1441
  }
 
5
  "added_tokens": [
6
  {
7
  "id": 0,
8
+ "content": "<unk>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  "single_word": false,
10
  "lstrip": false,
11
  "rstrip": false,
 
13
  "special": true
14
  },
15
  {
16
+ "id": 1,
17
+ "content": "<s>",
18
  "single_word": false,
19
  "lstrip": false,
20
  "rstrip": false,
 
22
  "special": true
23
  },
24
  {
25
+ "id": 2,
26
+ "content": "</s>",
27
  "single_word": false,
28
  "lstrip": false,
29
  "rstrip": false,
 
31
  "special": true
32
  },
33
  {
34
+ "id": 3,
35
+ "content": "<pad>",
36
  "single_word": false,
37
  "lstrip": false,
38
  "rstrip": false,
 
42
  ],
43
  "normalizer": null,
44
  "pre_tokenizer": {
45
+ "type": "Metaspace",
46
+ "replacement": "▁",
47
+ "prepend_scheme": "first",
48
+ "split": false
 
 
 
 
 
 
 
 
49
  },
50
+ "post_processor": null,
51
+ "decoder": {
52
+ "type": "Sequence",
53
+ "decoders": [
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  {
55
+ "type": "Replace",
56
+ "pattern": {
57
+ "String": "▁"
58
+ },
59
+ "content": " "
60
  },
61
  {
62
+ "type": "ByteFallback"
 
 
 
63
  },
64
  {
65
+ "type": "Fuse"
 
 
 
66
  },
67
  {
68
+ "type": "Strip",
69
+ "content": " ",
70
+ "start": 1,
71
+ "stop": 0
 
 
 
 
 
 
 
 
 
 
 
72
  }
73
+ ]
 
 
 
 
 
 
74
  },
75
  "model": {
76
+ "type": "BPE",
77
+ "dropout": null,
78
+ "unk_token": null,
79
+ "continuing_subword_prefix": null,
80
+ "end_of_word_suffix": null,
81
+ "fuse_unk": true,
82
+ "byte_fallback": true,
83
+ "ignore_merges": false,
84
+ "vocab": {
85
+ "<unk>": 0,
86
+ "<s>": 1,
87
+ "</s>": 2
88
+ },
89
+ "merges": []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  }
91
  }
tokenizer_config.json CHANGED
@@ -1,112 +1,5 @@
1
  {
2
  "backend": "tokenizers",
3
- "bos_token": "<s>",
4
- "eos_token": "</s>",
5
- "extra_ids": 100,
6
- "extra_special_tokens": [
7
- "<extra_id_0>",
8
- "<extra_id_1>",
9
- "<extra_id_2>",
10
- "<extra_id_3>",
11
- "<extra_id_4>",
12
- "<extra_id_5>",
13
- "<extra_id_6>",
14
- "<extra_id_7>",
15
- "<extra_id_8>",
16
- "<extra_id_9>",
17
- "<extra_id_10>",
18
- "<extra_id_11>",
19
- "<extra_id_12>",
20
- "<extra_id_13>",
21
- "<extra_id_14>",
22
- "<extra_id_15>",
23
- "<extra_id_16>",
24
- "<extra_id_17>",
25
- "<extra_id_18>",
26
- "<extra_id_19>",
27
- "<extra_id_20>",
28
- "<extra_id_21>",
29
- "<extra_id_22>",
30
- "<extra_id_23>",
31
- "<extra_id_24>",
32
- "<extra_id_25>",
33
- "<extra_id_26>",
34
- "<extra_id_27>",
35
- "<extra_id_28>",
36
- "<extra_id_29>",
37
- "<extra_id_30>",
38
- "<extra_id_31>",
39
- "<extra_id_32>",
40
- "<extra_id_33>",
41
- "<extra_id_34>",
42
- "<extra_id_35>",
43
- "<extra_id_36>",
44
- "<extra_id_37>",
45
- "<extra_id_38>",
46
- "<extra_id_39>",
47
- "<extra_id_40>",
48
- "<extra_id_41>",
49
- "<extra_id_42>",
50
- "<extra_id_43>",
51
- "<extra_id_44>",
52
- "<extra_id_45>",
53
- "<extra_id_46>",
54
- "<extra_id_47>",
55
- "<extra_id_48>",
56
- "<extra_id_49>",
57
- "<extra_id_50>",
58
- "<extra_id_51>",
59
- "<extra_id_52>",
60
- "<extra_id_53>",
61
- "<extra_id_54>",
62
- "<extra_id_55>",
63
- "<extra_id_56>",
64
- "<extra_id_57>",
65
- "<extra_id_58>",
66
- "<extra_id_59>",
67
- "<extra_id_60>",
68
- "<extra_id_61>",
69
- "<extra_id_62>",
70
- "<extra_id_63>",
71
- "<extra_id_64>",
72
- "<extra_id_65>",
73
- "<extra_id_66>",
74
- "<extra_id_67>",
75
- "<extra_id_68>",
76
- "<extra_id_69>",
77
- "<extra_id_70>",
78
- "<extra_id_71>",
79
- "<extra_id_72>",
80
- "<extra_id_73>",
81
- "<extra_id_74>",
82
- "<extra_id_75>",
83
- "<extra_id_76>",
84
- "<extra_id_77>",
85
- "<extra_id_78>",
86
- "<extra_id_79>",
87
- "<extra_id_80>",
88
- "<extra_id_81>",
89
- "<extra_id_82>",
90
- "<extra_id_83>",
91
- "<extra_id_84>",
92
- "<extra_id_85>",
93
- "<extra_id_86>",
94
- "<extra_id_87>",
95
- "<extra_id_88>",
96
- "<extra_id_89>",
97
- "<extra_id_90>",
98
- "<extra_id_91>",
99
- "<extra_id_92>",
100
- "<extra_id_93>",
101
- "<extra_id_94>",
102
- "<extra_id_95>",
103
- "<extra_id_96>",
104
- "<extra_id_97>",
105
- "<extra_id_98>",
106
- "<extra_id_99>"
107
- ],
108
  "model_max_length": 1000000000000000019884624838656,
109
- "pad_token": "<pad>",
110
- "tokenizer_class": "T5Tokenizer",
111
- "unk_token": "<unk>"
112
  }
 
1
  {
2
  "backend": "tokenizers",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  "model_max_length": 1000000000000000019884624838656,
4
+ "tokenizer_class": "TokenizersBackend"
 
 
5
  }