File size: 55,891 Bytes
2216aae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
# Generated content DO NOT EDIT
class AddedToken:
    """
    Represents a token that can be be added to a :class:`~tokenizers.Tokenizer`.
    It can have special options that defines the way it should behave.

    Args:
        content (:obj:`str`): The content of the token

        single_word (:obj:`bool`, defaults to :obj:`False`):
            Defines whether this token should only match single words. If :obj:`True`, this
            token will never match inside of a word. For example the token ``ing`` would match
            on ``tokenizing`` if this option is :obj:`False`, but not if it is :obj:`True`.
            The notion of "`inside of a word`" is defined by the word boundaries pattern in
            regular expressions (ie. the token should start and end with word boundaries).

        lstrip (:obj:`bool`, defaults to :obj:`False`):
            Defines whether this token should strip all potential whitespaces on its left side.
            If :obj:`True`, this token will greedily match any whitespace on its left. For
            example if we try to match the token ``[MASK]`` with ``lstrip=True``, in the text
            ``"I saw a [MASK]"``, we would match on ``" [MASK]"``. (Note the space on the left).

        rstrip (:obj:`bool`, defaults to :obj:`False`):
            Defines whether this token should strip all potential whitespaces on its right
            side. If :obj:`True`, this token will greedily match any whitespace on its right.
            It works just like :obj:`lstrip` but on the right.

        normalized (:obj:`bool`, defaults to :obj:`True` with :meth:`~tokenizers.Tokenizer.add_tokens` and :obj:`False` with :meth:`~tokenizers.Tokenizer.add_special_tokens`):
            Defines whether this token should match against the normalized version of the input
            text. For example, with the added token ``"yesterday"``, and a normalizer in charge of
            lowercasing the text, the token could be extract from the input ``"I saw a lion
            Yesterday"``.
        special (:obj:`bool`, defaults to :obj:`False` with :meth:`~tokenizers.Tokenizer.add_tokens` and :obj:`False` with :meth:`~tokenizers.Tokenizer.add_special_tokens`):
            Defines whether this token should be skipped when decoding.

    """
    def __init__(self, content=None, single_word=False, lstrip=False, rstrip=False, normalized=True, special=False):
        pass

    def __getstate__(self):
        """ """
        pass

    def __setstate__(self, state):
        """ """
        pass

    @property
    def content(self):
        """
        Get the content of this :obj:`AddedToken`
        """
        pass

    @content.setter
    def content(self, value):
        """
        Get the content of this :obj:`AddedToken`
        """
        pass

    @property
    def lstrip(self):
        """
        Get the value of the :obj:`lstrip` option
        """
        pass

    @lstrip.setter
    def lstrip(self, value):
        """
        Get the value of the :obj:`lstrip` option
        """
        pass

    @property
    def normalized(self):
        """
        Get the value of the :obj:`normalized` option
        """
        pass

    @normalized.setter
    def normalized(self, value):
        """
        Get the value of the :obj:`normalized` option
        """
        pass

    @property
    def rstrip(self):
        """
        Get the value of the :obj:`rstrip` option
        """
        pass

    @rstrip.setter
    def rstrip(self, value):
        """
        Get the value of the :obj:`rstrip` option
        """
        pass

    @property
    def single_word(self):
        """
        Get the value of the :obj:`single_word` option
        """
        pass

    @single_word.setter
    def single_word(self, value):
        """
        Get the value of the :obj:`single_word` option
        """
        pass

    @property
    def special(self):
        """
        Get the value of the :obj:`special` option
        """
        pass

    @special.setter
    def special(self, value):
        """
        Get the value of the :obj:`special` option
        """
        pass

class Encoding:
    """
    The :class:`~tokenizers.Encoding` represents the output of a :class:`~tokenizers.Tokenizer`.
    """
    def __init__(self):
        pass

    def __getstate__(self):
        """ """
        pass

    def __setstate__(self, state):
        """ """
        pass

    @property
    def attention_mask(self):
        """
        The attention mask

        This indicates to the LM which tokens should be attended to, and which should not.
        This is especially important when batching sequences, where we need to applying
        padding.

        Returns:
           :obj:`List[int]`: The attention mask
        """
        pass

    @attention_mask.setter
    def attention_mask(self, value):
        """
        The attention mask

        This indicates to the LM which tokens should be attended to, and which should not.
        This is especially important when batching sequences, where we need to applying
        padding.

        Returns:
           :obj:`List[int]`: The attention mask
        """
        pass

    def char_to_token(self, char_pos, sequence_index=0):
        """
        Get the token that contains the char at the given position in the input sequence.

        Args:
            char_pos (:obj:`int`):
                The position of a char in the input string
            sequence_index (:obj:`int`, defaults to :obj:`0`):
                The index of the sequence that contains the target char

        Returns:
            :obj:`int`: The index of the token that contains this char in the encoded sequence
        """
        pass

    def char_to_word(self, char_pos, sequence_index=0):
        """
        Get the word that contains the char at the given position in the input sequence.

        Args:
            char_pos (:obj:`int`):
                The position of a char in the input string
            sequence_index (:obj:`int`, defaults to :obj:`0`):
                The index of the sequence that contains the target char

        Returns:
            :obj:`int`: The index of the word that contains this char in the input sequence
        """
        pass

    @property
    def ids(self):
        """
        The generated IDs

        The IDs are the main input to a Language Model. They are the token indices,
        the numerical representations that a LM understands.

        Returns:
            :obj:`List[int]`: The list of IDs
        """
        pass

    @ids.setter
    def ids(self, value):
        """
        The generated IDs

        The IDs are the main input to a Language Model. They are the token indices,
        the numerical representations that a LM understands.

        Returns:
            :obj:`List[int]`: The list of IDs
        """
        pass

    @staticmethod
    def merge(encodings, growing_offsets=True):
        """
        Merge the list of encodings into one final :class:`~tokenizers.Encoding`

        Args:
            encodings (A :obj:`List` of :class:`~tokenizers.Encoding`):
                The list of encodings that should be merged in one

            growing_offsets (:obj:`bool`, defaults to :obj:`True`):
                Whether the offsets should accumulate while merging

        Returns:
            :class:`~tokenizers.Encoding`: The resulting Encoding
        """
        pass

    @property
    def n_sequences(self):
        """
        The number of sequences represented

        Returns:
            :obj:`int`: The number of sequences in this :class:`~tokenizers.Encoding`
        """
        pass

    @n_sequences.setter
    def n_sequences(self, value):
        """
        The number of sequences represented

        Returns:
            :obj:`int`: The number of sequences in this :class:`~tokenizers.Encoding`
        """
        pass

    @property
    def offsets(self):
        """
        The offsets associated to each token

        These offsets let's you slice the input string, and thus retrieve the original
        part that led to producing the corresponding token.

        Returns:
            A :obj:`List` of :obj:`Tuple[int, int]`: The list of offsets
        """
        pass

    @offsets.setter
    def offsets(self, value):
        """
        The offsets associated to each token

        These offsets let's you slice the input string, and thus retrieve the original
        part that led to producing the corresponding token.

        Returns:
            A :obj:`List` of :obj:`Tuple[int, int]`: The list of offsets
        """
        pass

    @property
    def overflowing(self):
        """
        A :obj:`List` of overflowing :class:`~tokenizers.Encoding`

        When using truncation, the :class:`~tokenizers.Tokenizer` takes care of splitting
        the output into as many pieces as required to match the specified maximum length.
        This field lets you retrieve all the subsequent pieces.

        When you use pairs of sequences, the overflowing pieces will contain enough
        variations to cover all the possible combinations, while respecting the provided
        maximum length.
        """
        pass

    @overflowing.setter
    def overflowing(self, value):
        """
        A :obj:`List` of overflowing :class:`~tokenizers.Encoding`

        When using truncation, the :class:`~tokenizers.Tokenizer` takes care of splitting
        the output into as many pieces as required to match the specified maximum length.
        This field lets you retrieve all the subsequent pieces.

        When you use pairs of sequences, the overflowing pieces will contain enough
        variations to cover all the possible combinations, while respecting the provided
        maximum length.
        """
        pass

    def pad(self, length, direction="right", pad_id=0, pad_type_id=0, pad_token="[PAD]"):
        """
        Pad the :class:`~tokenizers.Encoding` at the given length

        Args:
            length (:obj:`int`):
                The desired length

            direction: (:obj:`str`, defaults to :obj:`right`):
                The expected padding direction. Can be either :obj:`right` or :obj:`left`

            pad_id (:obj:`int`, defaults to :obj:`0`):
                The ID corresponding to the padding token

            pad_type_id (:obj:`int`, defaults to :obj:`0`):
                The type ID corresponding to the padding token

            pad_token (:obj:`str`, defaults to `[PAD]`):
                The pad token to use
        """
        pass

    @property
    def sequence_ids(self):
        """
        The generated sequence indices.

        They represent the index of the input sequence associated to each token.
        The sequence id can be None if the token is not related to any input sequence,
        like for example with special tokens.

        Returns:
            A :obj:`List` of :obj:`Optional[int]`: A list of optional sequence index.
        """
        pass

    @sequence_ids.setter
    def sequence_ids(self, value):
        """
        The generated sequence indices.

        They represent the index of the input sequence associated to each token.
        The sequence id can be None if the token is not related to any input sequence,
        like for example with special tokens.

        Returns:
            A :obj:`List` of :obj:`Optional[int]`: A list of optional sequence index.
        """
        pass

    def set_sequence_id(self, sequence_id):
        """
        Set the given sequence index

        Set the given sequence index for the whole range of tokens contained in this
        :class:`~tokenizers.Encoding`.
        """
        pass

    @property
    def special_tokens_mask(self):
        """
        The special token mask

        This indicates which tokens are special tokens, and which are not.

        Returns:
            :obj:`List[int]`: The special tokens mask
        """
        pass

    @special_tokens_mask.setter
    def special_tokens_mask(self, value):
        """
        The special token mask

        This indicates which tokens are special tokens, and which are not.

        Returns:
            :obj:`List[int]`: The special tokens mask
        """
        pass

    def token_to_chars(self, token_index):
        """
        Get the offsets of the token at the given index.

        The returned offsets are related to the input sequence that contains the
        token.  In order to determine in which input sequence it belongs, you
        must call :meth:`~tokenizers.Encoding.token_to_sequence()`.

        Args:
            token_index (:obj:`int`):
                The index of a token in the encoded sequence.

        Returns:
            :obj:`Tuple[int, int]`: The token offsets :obj:`(first, last + 1)`
        """
        pass

    def token_to_sequence(self, token_index):
        """
        Get the index of the sequence represented by the given token.

        In the general use case, this method returns :obj:`0` for a single sequence or
        the first sequence of a pair, and :obj:`1` for the second sequence of a pair

        Args:
            token_index (:obj:`int`):
                The index of a token in the encoded sequence.

        Returns:
            :obj:`int`: The sequence id of the given token
        """
        pass

    def token_to_word(self, token_index):
        """
        Get the index of the word that contains the token in one of the input sequences.

        The returned word index is related to the input sequence that contains
        the token.  In order to determine in which input sequence it belongs, you
        must call :meth:`~tokenizers.Encoding.token_to_sequence()`.

        Args:
            token_index (:obj:`int`):
                The index of a token in the encoded sequence.

        Returns:
            :obj:`int`: The index of the word in the relevant input sequence.
        """
        pass

    @property
    def tokens(self):
        """
        The generated tokens

        They are the string representation of the IDs.

        Returns:
            :obj:`List[str]`: The list of tokens
        """
        pass

    @tokens.setter
    def tokens(self, value):
        """
        The generated tokens

        They are the string representation of the IDs.

        Returns:
            :obj:`List[str]`: The list of tokens
        """
        pass

    def truncate(self, max_length, stride=0, direction="right"):
        """
        Truncate the :class:`~tokenizers.Encoding` at the given length

        If this :class:`~tokenizers.Encoding` represents multiple sequences, when truncating
        this information is lost. It will be considered as representing a single sequence.

        Args:
            max_length (:obj:`int`):
                The desired length

            stride (:obj:`int`, defaults to :obj:`0`):
                The length of previous content to be included in each overflowing piece

            direction (:obj:`str`, defaults to :obj:`right`):
                Truncate direction
        """
        pass

    @property
    def type_ids(self):
        """
        The generated type IDs

        Generally used for tasks like sequence classification or question answering,
        these tokens let the LM know which input sequence corresponds to each tokens.

        Returns:
            :obj:`List[int]`: The list of type ids
        """
        pass

    @type_ids.setter
    def type_ids(self, value):
        """
        The generated type IDs

        Generally used for tasks like sequence classification or question answering,
        these tokens let the LM know which input sequence corresponds to each tokens.

        Returns:
            :obj:`List[int]`: The list of type ids
        """
        pass

    @property
    def word_ids(self):
        """
        The generated word indices.

        They represent the index of the word associated to each token.
        When the input is pre-tokenized, they correspond to the ID of the given input label,
        otherwise they correspond to the words indices as defined by the
        :class:`~tokenizers.pre_tokenizers.PreTokenizer` that was used.

        For special tokens and such (any token that was generated from something that was
        not part of the input), the output is :obj:`None`

        Returns:
            A :obj:`List` of :obj:`Optional[int]`: A list of optional word index.
        """
        pass

    @word_ids.setter
    def word_ids(self, value):
        """
        The generated word indices.

        They represent the index of the word associated to each token.
        When the input is pre-tokenized, they correspond to the ID of the given input label,
        otherwise they correspond to the words indices as defined by the
        :class:`~tokenizers.pre_tokenizers.PreTokenizer` that was used.

        For special tokens and such (any token that was generated from something that was
        not part of the input), the output is :obj:`None`

        Returns:
            A :obj:`List` of :obj:`Optional[int]`: A list of optional word index.
        """
        pass

    def word_to_chars(self, word_index, sequence_index=0):
        """
        Get the offsets of the word at the given index in one of the input sequences.

        Args:
            word_index (:obj:`int`):
                The index of a word in one of the input sequences.
            sequence_index (:obj:`int`, defaults to :obj:`0`):
                The index of the sequence that contains the target word

        Returns:
            :obj:`Tuple[int, int]`: The range of characters (span) :obj:`(first, last + 1)`
        """
        pass

    def word_to_tokens(self, word_index, sequence_index=0):
        """
        Get the encoded tokens corresponding to the word at the given index
        in one of the input sequences.

        Args:
            word_index (:obj:`int`):
                The index of a word in one of the input sequences.
            sequence_index (:obj:`int`, defaults to :obj:`0`):
                The index of the sequence that contains the target word

        Returns:
            :obj:`Tuple[int, int]`: The range of tokens: :obj:`(first, last + 1)`
        """
        pass

    @property
    def words(self):
        """
        The generated word indices.

        .. warning::
            This is deprecated and will be removed in a future version.
            Please use :obj:`~tokenizers.Encoding.word_ids` instead.

        They represent the index of the word associated to each token.
        When the input is pre-tokenized, they correspond to the ID of the given input label,
        otherwise they correspond to the words indices as defined by the
        :class:`~tokenizers.pre_tokenizers.PreTokenizer` that was used.

        For special tokens and such (any token that was generated from something that was
        not part of the input), the output is :obj:`None`

        Returns:
            A :obj:`List` of :obj:`Optional[int]`: A list of optional word index.
        """
        pass

    @words.setter
    def words(self, value):
        """
        The generated word indices.

        .. warning::
            This is deprecated and will be removed in a future version.
            Please use :obj:`~tokenizers.Encoding.word_ids` instead.

        They represent the index of the word associated to each token.
        When the input is pre-tokenized, they correspond to the ID of the given input label,
        otherwise they correspond to the words indices as defined by the
        :class:`~tokenizers.pre_tokenizers.PreTokenizer` that was used.

        For special tokens and such (any token that was generated from something that was
        not part of the input), the output is :obj:`None`

        Returns:
            A :obj:`List` of :obj:`Optional[int]`: A list of optional word index.
        """
        pass

class NormalizedString:
    """
    NormalizedString

    A NormalizedString takes care of modifying an "original" string, to obtain a "normalized" one.
    While making all the requested modifications, it keeps track of the alignment information
    between the two versions of the string.

    Args:
        sequence: str:
            The string sequence used to initialize this NormalizedString
    """
    def __init__(self, sequence):
        pass

    def __getitem__(self, key):
        """
        Return self[key].
        """
        pass

    def __getstate__(self, /):
        """
        Helper for pickle.
        """
        pass

    def append(self, s):
        """
        Append the given sequence to the string
        """
        pass

    def clear(self):
        """
        Clears the string
        """
        pass

    def filter(self, func):
        """
        Filter each character of the string using the given func
        """
        pass

    def for_each(self, func):
        """
        Calls the given function for each character of the string
        """
        pass

    def lowercase(self):
        """
        Lowercase the string
        """
        pass

    def lstrip(self):
        """
        Strip the left of the string
        """
        pass

    def map(self, func):
        """
        Calls the given function for each character of the string

        Replaces each character of the string using the returned value. Each
        returned value **must** be a str of length 1 (ie a character).
        """
        pass

    def nfc(self):
        """
        Runs the NFC normalization
        """
        pass

    def nfd(self):
        """
        Runs the NFD normalization
        """
        pass

    def nfkc(self):
        """
        Runs the NFKC normalization
        """
        pass

    def nfkd(self):
        """
        Runs the NFKD normalization
        """
        pass

    @property
    def normalized(self):
        """
        The normalized part of the string
        """
        pass

    @normalized.setter
    def normalized(self, value):
        """
        The normalized part of the string
        """
        pass

    @property
    def original(self):
        """ """
        pass

    @original.setter
    def original(self, value):
        """ """
        pass

    def prepend(self, s):
        """
        Prepend the given sequence to the string
        """
        pass

    def replace(self, pattern, content):
        """
        Replace the content of the given pattern with the provided content

        Args:
            pattern: Pattern:
                A pattern used to match the string. Usually a string or a Regex

            content: str:
                The content to be used as replacement
        """
        pass

    def rstrip(self):
        """
        Strip the right of the string
        """
        pass

    def slice(self, range):
        """
        Slice the string using the given range
        """
        pass

    def split(self, pattern, behavior):
        """
        Split the NormalizedString using the given pattern and the specified behavior

        Args:
            pattern: Pattern:
                A pattern used to split the string. Usually a string or a regex built with `tokenizers.Regex`

            behavior: SplitDelimiterBehavior:
                The behavior to use when splitting.
                Choices: "removed", "isolated", "merged_with_previous", "merged_with_next",
                "contiguous"

        Returns:
            A list of NormalizedString, representing each split
        """
        pass

    def strip(self):
        """
        Strip both ends of the string
        """
        pass

    def uppercase(self):
        """
        Uppercase the string
        """
        pass

class PreTokenizedString:
    """
    PreTokenizedString

    Wrapper over a string, that provides a way to normalize, pre-tokenize, tokenize the
    underlying string, while keeping track of the alignment information (offsets).

    The PreTokenizedString manages what we call `splits`. Each split represents a substring
    which is a subpart of the original string, with the relevant offsets and tokens.

    When calling one of the methods used to modify the PreTokenizedString (namely one of
    `split`, `normalize` or `tokenize), only the `splits` that don't have any associated
    tokens will get modified.

    Args:
        sequence: str:
            The string sequence used to initialize this PreTokenizedString
    """
    def __init__(self, sequence):
        pass

    def __getstate__(self, /):
        """
        Helper for pickle.
        """
        pass

    def get_splits(self, offset_referential="original", offset_type="char"):
        """
        Get the splits currently managed by the PreTokenizedString

        Args:
            offset_referential: :obj:`str`
                Whether the returned splits should have offsets expressed relative
                to the original string, or the normalized one. choices: "original", "normalized".

            offset_type: :obj:`str`
                Whether the returned splits should have offsets expressed in bytes or chars.
                When slicing an str, we usually want to use chars, which is the default value.
                Now in some cases it might be interesting to get these offsets expressed in bytes,
                so it is possible to change this here.
                choices: "char", "bytes"

        Returns
            A list of splits
        """
        pass

    def normalize(self, func):
        """
        Normalize each split of the `PreTokenizedString` using the given `func`

        Args:
            func: Callable[[NormalizedString], None]:
                The function used to normalize each underlying split. This function
                does not need to return anything, just calling the methods on the provided
                NormalizedString allow its modification.
        """
        pass

    def split(self, func):
        """
        Split the PreTokenizedString using the given `func`

        Args:
            func: Callable[[index, NormalizedString], List[NormalizedString]]:
                The function used to split each underlying split.
                It is expected to return a list of `NormalizedString`, that represent the new
                splits. If the given `NormalizedString` does not need any splitting, we can
                just return it directly.
                In order for the offsets to be tracked accurately, any returned `NormalizedString`
                should come from calling either `.split` or `.slice` on the received one.
        """
        pass

    def to_encoding(self, type_id=0, word_idx=None):
        """
        Return an Encoding generated from this PreTokenizedString

        Args:
            type_id: int = 0:
                The type_id to be used on the generated Encoding.

            word_idx: Optional[int] = None:
                An optional word index to be used for each token of this Encoding. If provided,
                all the word indices in the generated Encoding will use this value, instead
                of the one automatically tracked during pre-tokenization.

        Returns:
            An Encoding
        """
        pass

    def tokenize(self, func):
        """
        Tokenize each split of the `PreTokenizedString` using the given `func`

        Args:
            func: Callable[[str], List[Token]]:
                The function used to tokenize each underlying split. This function must return
                a list of Token generated from the input str.
        """
        pass

class Regex:
    """
    Instantiate a new Regex with the given pattern
    """
    def __init__(self, pattern):
        pass

    def __getstate__(self, /):
        """
        Helper for pickle.
        """
        pass

class Token:
    def __init__(self, id, value, offsets):
        pass

    def __getstate__(self, /):
        """
        Helper for pickle.
        """
        pass

    def as_tuple(self):
        """ """
        pass

    @property
    def id(self):
        """ """
        pass

    @id.setter
    def id(self, value):
        """ """
        pass

    @property
    def offsets(self):
        """ """
        pass

    @offsets.setter
    def offsets(self, value):
        """ """
        pass

    @property
    def value(self):
        """ """
        pass

    @value.setter
    def value(self, value):
        """ """
        pass

class Tokenizer:
    """
    A :obj:`Tokenizer` works as a pipeline. It processes some raw text as input
    and outputs an :class:`~tokenizers.Encoding`.

    Args:
        model (:class:`~tokenizers.models.Model`):
            The core algorithm that this :obj:`Tokenizer` should be using.

    """
    def __init__(self, model):
        pass

    def __getnewargs__(self):
        """ """
        pass

    def __getstate__(self):
        """ """
        pass

    def __setstate__(self, state):
        """ """
        pass

    def add_special_tokens(self, tokens):
        """
        Add the given special tokens to the Tokenizer.

        If these tokens are already part of the vocabulary, it just let the Tokenizer know about
        them. If they don't exist, the Tokenizer creates them, giving them a new id.

        These special tokens will never be processed by the model (ie won't be split into
        multiple tokens), and they can be removed from the output when decoding.

        Args:
            tokens (A :obj:`List` of :class:`~tokenizers.AddedToken` or :obj:`str`):
                The list of special tokens we want to add to the vocabulary. Each token can either
                be a string or an instance of :class:`~tokenizers.AddedToken` for more
                customization.

        Returns:
            :obj:`int`: The number of tokens that were created in the vocabulary
        """
        pass

    def add_tokens(self, tokens):
        """
        Add the given tokens to the vocabulary

        The given tokens are added only if they don't already exist in the vocabulary.
        Each token then gets a new attributed id.

        Args:
            tokens (A :obj:`List` of :class:`~tokenizers.AddedToken` or :obj:`str`):
                The list of tokens we want to add to the vocabulary. Each token can be either a
                string or an instance of :class:`~tokenizers.AddedToken` for more customization.

        Returns:
            :obj:`int`: The number of tokens that were created in the vocabulary
        """
        pass

    def async_decode_batch(self, sequences, skip_special_tokens=True):
        """
        Decode a batch of ids back to their corresponding string

        Args:
            sequences (:obj:`List` of :obj:`List[int]`):
                The batch of sequences we want to decode

            skip_special_tokens (:obj:`bool`, defaults to :obj:`True`):
                Whether the special tokens should be removed from the decoded strings

        Returns:
            :obj:`List[str]`: A list of decoded strings
        """
        pass

    def async_encode(self, sequence, pair=None, is_pretokenized=False, add_special_tokens=True):
        """
        Asynchronously encode the given input with character offsets.

        This is an async version of encode that can be awaited in async Python code.

        Example:
            Here are some examples of the inputs that are accepted::

                await async_encode("A single sequence")

        Args:
            sequence (:obj:`~tokenizers.InputSequence`):
                The main input sequence we want to encode. This sequence can be either raw
                text or pre-tokenized, according to the ``is_pretokenized`` argument:

                - If ``is_pretokenized=False``: :class:`~tokenizers.TextInputSequence`
                - If ``is_pretokenized=True``: :class:`~tokenizers.PreTokenizedInputSequence`

            pair (:obj:`~tokenizers.InputSequence`, `optional`):
                An optional input sequence. The expected format is the same that for ``sequence``.

            is_pretokenized (:obj:`bool`, defaults to :obj:`False`):
                Whether the input is already pre-tokenized

            add_special_tokens (:obj:`bool`, defaults to :obj:`True`):
                Whether to add the special tokens

        Returns:
            :class:`~tokenizers.Encoding`: The encoded result

        """
        pass

    def async_encode_batch(self, input, is_pretokenized=False, add_special_tokens=True):
        """
        Asynchronously encode the given batch of inputs with character offsets.

        This is an async version of encode_batch that can be awaited in async Python code.

        Example:
            Here are some examples of the inputs that are accepted::

                await async_encode_batch([
                    "A single sequence",
                    ("A tuple with a sequence", "And its pair"),
                    [ "A", "pre", "tokenized", "sequence" ],
                    ([ "A", "pre", "tokenized", "sequence" ], "And its pair")
                ])

        Args:
            input (A :obj:`List`/:obj:`Tuple` of :obj:`~tokenizers.EncodeInput`):
                A list of single sequences or pair sequences to encode. Each sequence
                can be either raw text or pre-tokenized, according to the ``is_pretokenized``
                argument:

                - If ``is_pretokenized=False``: :class:`~tokenizers.TextEncodeInput`
                - If ``is_pretokenized=True``: :class:`~tokenizers.PreTokenizedEncodeInput`

            is_pretokenized (:obj:`bool`, defaults to :obj:`False`):
                Whether the input is already pre-tokenized

            add_special_tokens (:obj:`bool`, defaults to :obj:`True`):
                Whether to add the special tokens

        Returns:
            A :obj:`List` of :class:`~tokenizers.Encoding`: The encoded batch

        """
        pass

    def async_encode_batch_fast(self, input, is_pretokenized=False, add_special_tokens=True):
        """
        Asynchronously encode the given batch of inputs without tracking character offsets.

        This is an async version of encode_batch_fast that can be awaited in async Python code.

        Example:
            Here are some examples of the inputs that are accepted::

                await async_encode_batch_fast([
                    "A single sequence",
                    ("A tuple with a sequence", "And its pair"),
                    [ "A", "pre", "tokenized", "sequence" ],
                    ([ "A", "pre", "tokenized", "sequence" ], "And its pair")
                ])

        Args:
            input (A :obj:`List`/:obj:`Tuple` of :obj:`~tokenizers.EncodeInput`):
                A list of single sequences or pair sequences to encode. Each sequence
                can be either raw text or pre-tokenized, according to the ``is_pretokenized``
                argument:

                - If ``is_pretokenized=False``: :class:`~tokenizers.TextEncodeInput`
                - If ``is_pretokenized=True``: :class:`~tokenizers.PreTokenizedEncodeInput`

            is_pretokenized (:obj:`bool`, defaults to :obj:`False`):
                Whether the input is already pre-tokenized

            add_special_tokens (:obj:`bool`, defaults to :obj:`True`):
                Whether to add the special tokens

        Returns:
            A :obj:`List` of :class:`~tokenizers.Encoding`: The encoded batch

        """
        pass

    def decode(self, ids, skip_special_tokens=True):
        """
        Decode the given list of ids back to a string

        This is used to decode anything coming back from a Language Model

        Args:
            ids (A :obj:`List/Tuple` of :obj:`int`):
                The list of ids that we want to decode

            skip_special_tokens (:obj:`bool`, defaults to :obj:`True`):
                Whether the special tokens should be removed from the decoded string

        Returns:
            :obj:`str`: The decoded string
        """
        pass

    def decode_batch(self, sequences, skip_special_tokens=True):
        """
        Decode a batch of ids back to their corresponding string

        Args:
            sequences (:obj:`List` of :obj:`List[int]`):
                The batch of sequences we want to decode

            skip_special_tokens (:obj:`bool`, defaults to :obj:`True`):
                Whether the special tokens should be removed from the decoded strings

        Returns:
            :obj:`List[str]`: A list of decoded strings
        """
        pass

    @property
    def decoder(self):
        """
        The `optional` :class:`~tokenizers.decoders.Decoder` in use by the Tokenizer
        """
        pass

    @decoder.setter
    def decoder(self, value):
        """
        The `optional` :class:`~tokenizers.decoders.Decoder` in use by the Tokenizer
        """
        pass

    def enable_padding(
        self, direction="right", pad_id=0, pad_type_id=0, pad_token="[PAD]", length=None, pad_to_multiple_of=None
    ):
        """
        Enable the padding

        Args:
            direction (:obj:`str`, `optional`, defaults to :obj:`right`):
                The direction in which to pad. Can be either ``right`` or ``left``

            pad_to_multiple_of (:obj:`int`, `optional`):
                If specified, the padding length should always snap to the next multiple of the
                given value. For example if we were going to pad witha length of 250 but
                ``pad_to_multiple_of=8`` then we will pad to 256.

            pad_id (:obj:`int`, defaults to 0):
                The id to be used when padding

            pad_type_id (:obj:`int`, defaults to 0):
                The type id to be used when padding

            pad_token (:obj:`str`, defaults to :obj:`[PAD]`):
                The pad token to be used when padding

            length (:obj:`int`, `optional`):
                If specified, the length at which to pad. If not specified we pad using the size of
                the longest sequence in a batch.
        """
        pass

    def enable_truncation(self, max_length, stride=0, strategy="longest_first", direction="right"):
        """
        Enable truncation

        Args:
            max_length (:obj:`int`):
                The max length at which to truncate

            stride (:obj:`int`, `optional`):
                The length of the previous first sequence to be included in the overflowing
                sequence

            strategy (:obj:`str`, `optional`, defaults to :obj:`longest_first`):
                The strategy used to truncation. Can be one of ``longest_first``, ``only_first`` or
                ``only_second``.

            direction (:obj:`str`, defaults to :obj:`right`):
                Truncate direction
        """
        pass

    def encode(self, sequence, pair=None, is_pretokenized=False, add_special_tokens=True):
        """
        Encode the given sequence and pair. This method can process raw text sequences
        as well as already pre-tokenized sequences.

        Example:
            Here are some examples of the inputs that are accepted::

                encode("A single sequence")`
                encode("A sequence", "And its pair")`
                encode([ "A", "pre", "tokenized", "sequence" ], is_pretokenized=True)`
                encode(
                    [ "A", "pre", "tokenized", "sequence" ], [ "And", "its", "pair" ],
                    is_pretokenized=True
                )

        Args:
            sequence (:obj:`~tokenizers.InputSequence`):
                The main input sequence we want to encode. This sequence can be either raw
                text or pre-tokenized, according to the ``is_pretokenized`` argument:

                - If ``is_pretokenized=False``: :class:`~tokenizers.TextInputSequence`
                - If ``is_pretokenized=True``: :class:`~tokenizers.PreTokenizedInputSequence`

            pair (:obj:`~tokenizers.InputSequence`, `optional`):
                An optional input sequence. The expected format is the same that for ``sequence``.

            is_pretokenized (:obj:`bool`, defaults to :obj:`False`):
                Whether the input is already pre-tokenized

            add_special_tokens (:obj:`bool`, defaults to :obj:`True`):
                Whether to add the special tokens

        Returns:
            :class:`~tokenizers.Encoding`: The encoded result

        """
        pass

    def encode_batch(self, input, is_pretokenized=False, add_special_tokens=True):
        """
        Encode the given batch of inputs. This method accept both raw text sequences
        as well as already pre-tokenized sequences. The reason we use `PySequence` is
        because it allows type checking with zero-cost (according to PyO3) as we don't
        have to convert to check.

        Example:
            Here are some examples of the inputs that are accepted::

                encode_batch([
                    "A single sequence",
                    ("A tuple with a sequence", "And its pair"),
                    [ "A", "pre", "tokenized", "sequence" ],
                    ([ "A", "pre", "tokenized", "sequence" ], "And its pair")
                ])

        Args:
            input (A :obj:`List`/:obj:`Tuple` of :obj:`~tokenizers.EncodeInput`):
                A list of single sequences or pair sequences to encode. Each sequence
                can be either raw text or pre-tokenized, according to the ``is_pretokenized``
                argument:

                - If ``is_pretokenized=False``: :class:`~tokenizers.TextEncodeInput`
                - If ``is_pretokenized=True``: :class:`~tokenizers.PreTokenizedEncodeInput`

            is_pretokenized (:obj:`bool`, defaults to :obj:`False`):
                Whether the input is already pre-tokenized

            add_special_tokens (:obj:`bool`, defaults to :obj:`True`):
                Whether to add the special tokens

        Returns:
            A :obj:`List` of :class:`~tokenizers.Encoding`: The encoded batch

        """
        pass

    def encode_batch_fast(self, input, is_pretokenized=False, add_special_tokens=True):
        """
        Encode the given batch of inputs. This method is faster than `encode_batch`
        because it doesn't keep track of offsets, they will be all zeros.

        Example:
            Here are some examples of the inputs that are accepted::

                encode_batch_fast([
                    "A single sequence",
                    ("A tuple with a sequence", "And its pair"),
                    [ "A", "pre", "tokenized", "sequence" ],
                    ([ "A", "pre", "tokenized", "sequence" ], "And its pair")
                ])

        Args:
            input (A :obj:`List`/:obj:`Tuple` of :obj:`~tokenizers.EncodeInput`):
                A list of single sequences or pair sequences to encode. Each sequence
                can be either raw text or pre-tokenized, according to the ``is_pretokenized``
                argument:

                - If ``is_pretokenized=False``: :class:`~tokenizers.TextEncodeInput`
                - If ``is_pretokenized=True``: :class:`~tokenizers.PreTokenizedEncodeInput`

            is_pretokenized (:obj:`bool`, defaults to :obj:`False`):
                Whether the input is already pre-tokenized

            add_special_tokens (:obj:`bool`, defaults to :obj:`True`):
                Whether to add the special tokens

        Returns:
            A :obj:`List` of :class:`~tokenizers.Encoding`: The encoded batch

        """
        pass

    @property
    def encode_special_tokens(self):
        """
        Modifies the tokenizer in order to use or not the special tokens
        during encoding.

        Args:
            value (:obj:`bool`):
                Whether to use the special tokens or not

        """
        pass

    @encode_special_tokens.setter
    def encode_special_tokens(self, value):
        """
        Modifies the tokenizer in order to use or not the special tokens
        during encoding.

        Args:
            value (:obj:`bool`):
                Whether to use the special tokens or not

        """
        pass

    @staticmethod
    def from_buffer(buffer):
        """
        Instantiate a new :class:`~tokenizers.Tokenizer` from the given buffer.

        Args:
            buffer (:obj:`bytes`):
                A buffer containing a previously serialized :class:`~tokenizers.Tokenizer`

        Returns:
            :class:`~tokenizers.Tokenizer`: The new tokenizer
        """
        pass

    @staticmethod
    def from_file(path):
        """
        Instantiate a new :class:`~tokenizers.Tokenizer` from the file at the given path.

        Args:
            path (:obj:`str`):
                A path to a local JSON file representing a previously serialized
                :class:`~tokenizers.Tokenizer`

        Returns:
            :class:`~tokenizers.Tokenizer`: The new tokenizer
        """
        pass

    @staticmethod
    def from_pretrained(identifier, revision="main", token=None):
        """
        Instantiate a new :class:`~tokenizers.Tokenizer` from an existing file on the
        Hugging Face Hub.

        Args:
            identifier (:obj:`str`):
                The identifier of a Model on the Hugging Face Hub, that contains
                a tokenizer.json file
            revision (:obj:`str`, defaults to `main`):
                A branch or commit id
            token (:obj:`str`, `optional`, defaults to `None`):
                An optional auth token used to access private repositories on the
                Hugging Face Hub

        Returns:
            :class:`~tokenizers.Tokenizer`: The new tokenizer
        """
        pass

    @staticmethod
    def from_str(json):
        """
        Instantiate a new :class:`~tokenizers.Tokenizer` from the given JSON string.

        Args:
            json (:obj:`str`):
                A valid JSON string representing a previously serialized
                :class:`~tokenizers.Tokenizer`

        Returns:
            :class:`~tokenizers.Tokenizer`: The new tokenizer
        """
        pass

    def get_added_tokens_decoder(self):
        """
        Get the underlying vocabulary

        Returns:
            :obj:`Dict[int, AddedToken]`: The vocabulary
        """
        pass

    def get_vocab(self, with_added_tokens=True):
        """
        Get the underlying vocabulary

        Args:
            with_added_tokens (:obj:`bool`, defaults to :obj:`True`):
                Whether to include the added tokens

        Returns:
            :obj:`Dict[str, int]`: The vocabulary
        """
        pass

    def get_vocab_size(self, with_added_tokens=True):
        """
        Get the size of the underlying vocabulary

        Args:
            with_added_tokens (:obj:`bool`, defaults to :obj:`True`):
                Whether to include the added tokens

        Returns:
            :obj:`int`: The size of the vocabulary
        """
        pass

    def id_to_token(self, id):
        """
        Convert the given id to its corresponding token if it exists

        Args:
            id (:obj:`int`):
                The id to convert

        Returns:
            :obj:`Optional[str]`: An optional token, :obj:`None` if out of vocabulary
        """
        pass

    @property
    def model(self):
        """
        The :class:`~tokenizers.models.Model` in use by the Tokenizer
        """
        pass

    @model.setter
    def model(self, value):
        """
        The :class:`~tokenizers.models.Model` in use by the Tokenizer
        """
        pass

    def no_padding(self):
        """
        Disable padding
        """
        pass

    def no_truncation(self):
        """
        Disable truncation
        """
        pass

    @property
    def normalizer(self):
        """
        The `optional` :class:`~tokenizers.normalizers.Normalizer` in use by the Tokenizer
        """
        pass

    @normalizer.setter
    def normalizer(self, value):
        """
        The `optional` :class:`~tokenizers.normalizers.Normalizer` in use by the Tokenizer
        """
        pass

    def num_special_tokens_to_add(self, is_pair):
        """
        Return the number of special tokens that would be added for single/pair sentences.
        :param is_pair: Boolean indicating if the input would be a single sentence or a pair
        :return:
        """
        pass

    @property
    def padding(self):
        """
        Get the current padding parameters

        `Cannot be set, use` :meth:`~tokenizers.Tokenizer.enable_padding` `instead`

        Returns:
            (:obj:`dict`, `optional`):
                A dict with the current padding parameters if padding is enabled
        """
        pass

    @padding.setter
    def padding(self, value):
        """
        Get the current padding parameters

        `Cannot be set, use` :meth:`~tokenizers.Tokenizer.enable_padding` `instead`

        Returns:
            (:obj:`dict`, `optional`):
                A dict with the current padding parameters if padding is enabled
        """
        pass

    def post_process(self, encoding, pair=None, add_special_tokens=True):
        """
        Apply all the post-processing steps to the given encodings.

        The various steps are:

            1. Truncate according to the set truncation params (provided with
               :meth:`~tokenizers.Tokenizer.enable_truncation`)
            2. Apply the :class:`~tokenizers.processors.PostProcessor`
            3. Pad according to the set padding params (provided with
               :meth:`~tokenizers.Tokenizer.enable_padding`)

        Args:
            encoding (:class:`~tokenizers.Encoding`):
                The :class:`~tokenizers.Encoding` corresponding to the main sequence.

            pair (:class:`~tokenizers.Encoding`, `optional`):
                An optional :class:`~tokenizers.Encoding` corresponding to the pair sequence.

            add_special_tokens (:obj:`bool`):
                Whether to add the special tokens

        Returns:
            :class:`~tokenizers.Encoding`: The final post-processed encoding
        """
        pass

    @property
    def post_processor(self):
        """
        The `optional` :class:`~tokenizers.processors.PostProcessor` in use by the Tokenizer
        """
        pass

    @post_processor.setter
    def post_processor(self, value):
        """
        The `optional` :class:`~tokenizers.processors.PostProcessor` in use by the Tokenizer
        """
        pass

    @property
    def pre_tokenizer(self):
        """
        The `optional` :class:`~tokenizers.pre_tokenizers.PreTokenizer` in use by the Tokenizer
        """
        pass

    @pre_tokenizer.setter
    def pre_tokenizer(self, value):
        """
        The `optional` :class:`~tokenizers.pre_tokenizers.PreTokenizer` in use by the Tokenizer
        """
        pass

    def save(self, path, pretty=True):
        """
        Save the :class:`~tokenizers.Tokenizer` to the file at the given path.

        Args:
            path (:obj:`str`):
                A path to a file in which to save the serialized tokenizer.

            pretty (:obj:`bool`, defaults to :obj:`True`):
                Whether the JSON file should be pretty formatted.
        """
        pass

    def to_str(self, pretty=False):
        """
        Gets a serialized string representing this :class:`~tokenizers.Tokenizer`.

        Args:
            pretty (:obj:`bool`, defaults to :obj:`False`):
                Whether the JSON string should be pretty formatted.

        Returns:
            :obj:`str`: A string representing the serialized Tokenizer
        """
        pass

    def token_to_id(self, token):
        """
        Convert the given token to its corresponding id if it exists

        Args:
            token (:obj:`str`):
                The token to convert

        Returns:
            :obj:`Optional[int]`: An optional id, :obj:`None` if out of vocabulary
        """
        pass

    def train(self, files, trainer=None):
        """
        Train the Tokenizer using the given files.

        Reads the files line by line, while keeping all the whitespace, even new lines.
        If you want to train from data store in-memory, you can check
        :meth:`~tokenizers.Tokenizer.train_from_iterator`

        Args:
            files (:obj:`List[str]`):
                A list of path to the files that we should use for training

            trainer (:obj:`~tokenizers.trainers.Trainer`, `optional`):
                An optional trainer that should be used to train our Model
        """
        pass

    def train_from_iterator(self, iterator, trainer=None, length=None):
        """
        Train the Tokenizer using the provided iterator.

        You can provide anything that is a Python Iterator

            * A list of sequences :obj:`List[str]`
            * A generator that yields :obj:`str` or :obj:`List[str]`
            * A Numpy array of strings
            * ...

        Args:
            iterator (:obj:`Iterator`):
                Any iterator over strings or list of strings

            trainer (:obj:`~tokenizers.trainers.Trainer`, `optional`):
                An optional trainer that should be used to train our Model

            length (:obj:`int`, `optional`):
                The total number of sequences in the iterator. This is used to
                provide meaningful progress tracking
        """
        pass

    @property
    def truncation(self):
        """
        Get the currently set truncation parameters

        `Cannot set, use` :meth:`~tokenizers.Tokenizer.enable_truncation` `instead`

        Returns:
            (:obj:`dict`, `optional`):
                A dict with the current truncation parameters if truncation is enabled
        """
        pass

    @truncation.setter
    def truncation(self, value):
        """
        Get the currently set truncation parameters

        `Cannot set, use` :meth:`~tokenizers.Tokenizer.enable_truncation` `instead`

        Returns:
            (:obj:`dict`, `optional`):
                A dict with the current truncation parameters if truncation is enabled
        """
        pass

from enum import Enum
from typing import List, Tuple, Union, Any

Offsets = Tuple[int, int]
TextInputSequence = str
PreTokenizedInputSequence = Union[List[str], Tuple[str, ...]]
TextEncodeInput = Union[
    TextInputSequence,
    Tuple[TextInputSequence, TextInputSequence],
    List[TextInputSequence],
]
PreTokenizedEncodeInput = Union[
    PreTokenizedInputSequence,
    Tuple[PreTokenizedInputSequence, PreTokenizedInputSequence],
    List[PreTokenizedInputSequence],
]
InputSequence = Union[TextInputSequence, PreTokenizedInputSequence]
EncodeInput = Union[TextEncodeInput, PreTokenizedEncodeInput]

class OffsetReferential(Enum):
    ORIGINAL = "original"
    NORMALIZED = "normalized"

class OffsetType(Enum):
    BYTE = "byte"
    CHAR = "char"

class SplitDelimiterBehavior(Enum):
    REMOVED = "removed"
    ISOLATED = "isolated"
    MERGED_WITH_PREVIOUS = "merged_with_previous"
    MERGED_WITH_NEXT = "merged_with_next"
    CONTIGUOUS = "contiguous"

from .implementations import (
    BertWordPieceTokenizer,
    ByteLevelBPETokenizer,
    CharBPETokenizer,
    SentencePieceBPETokenizer,
    SentencePieceUnigramTokenizer,
)

def __getattr__(name: str) -> Any: ...

BertWordPieceTokenizer: Any
ByteLevelBPETokenizer: Any
CharBPETokenizer: Any
SentencePieceBPETokenizer: Any
SentencePieceUnigramTokenizer: Any