tokenizer-parity-v1 / bert /token_ids.json
dollspace's picture
feat: pin ferrotorch-tokenize parity fixtures v1 (#1168)
f41659a verified
{
"encode_with_special": [
[
101,
7592,
1010,
2088,
999,
102
],
[
101,
1996,
4248,
2829,
4419,
14523,
2058,
1996,
13971,
3899,
1012,
1996,
4248,
2829,
4419,
14523,
2058,
1996,
13971,
3899,
1012,
1996,
4248,
2829,
4419,
14523,
2058,
1996,
13971,
3899,
1012,
102
],
[
101,
1864,
1876,
1950,
1671,
30239,
30233,
30240,
100,
7861,
29147,
2072,
102
],
[
101,
27427,
14088,
3793,
102
],
[
101,
13366,
29379,
1006,
1060,
1007,
1024,
2709,
1060,
1009,
1015,
102
],
[
101,
1026,
1064,
4088,
1035,
1997,
1035,
3793,
1064,
1028,
7592,
1026,
1064,
2203,
1035,
1997,
1035,
3793,
1064,
1028,
102
],
[
101,
101,
6251,
1037,
102,
6251,
1038,
102,
102
],
[
101,
102
],
[
101,
1037,
102
],
[
101,
2877,
1998,
12542,
102
],
[
101,
3816,
13138,
2007,
3616,
3429,
2575,
2581,
1998,
9255,
999,
1030,
1001,
1002,
1003,
1034,
1004,
1008,
1006,
1007,
102
],
[
101,
2047,
4179,
2093,
102
],
[
101,
21628,
21628,
21628,
102
],
[
101,
14686,
1000,
3313,
1000,
1998,
1005,
2309,
1005,
1998,
1036,
2067,
26348,
1036,
102
],
[
101,
24471,
2140,
1024,
16770,
1024,
1013,
1013,
2742,
1012,
4012,
1013,
4130,
1029,
23032,
1027,
3643,
1004,
2060,
1027,
1015,
102
],
[
101,
10373,
1024,
5650,
1030,
2742,
1012,
4012,
1010,
3960,
1030,
29379,
1012,
22834,
102
],
[
101,
1746,
1861,
100,
100,
2007,
2394,
3816,
102
],
[
101,
15192,
13360,
11057,
11057,
11057,
11057,
2050,
1998,
22861,
10322,
10322,
10322,
10322,
10322,
102
],
[
101,
7861,
29147,
2072,
4542,
100,
1998,
3340,
100,
102
],
[
101,
3642,
1024,
1036,
20014,
2364,
1006,
1007,
1063,
2709,
1014,
1025,
1065,
1036,
102
]
],
"encode_no_special": [
[
7592,
1010,
2088,
999
],
[
1996,
4248,
2829,
4419,
14523,
2058,
1996,
13971,
3899,
1012,
1996,
4248,
2829,
4419,
14523,
2058,
1996,
13971,
3899,
1012,
1996,
4248,
2829,
4419,
14523,
2058,
1996,
13971,
3899,
1012
],
[
1864,
1876,
1950,
1671,
30239,
30233,
30240,
100,
7861,
29147,
2072
],
[
27427,
14088,
3793
],
[
13366,
29379,
1006,
1060,
1007,
1024,
2709,
1060,
1009,
1015
],
[
1026,
1064,
4088,
1035,
1997,
1035,
3793,
1064,
1028,
7592,
1026,
1064,
2203,
1035,
1997,
1035,
3793,
1064,
1028
],
[
101,
6251,
1037,
102,
6251,
1038,
102
],
[],
[
1037
],
[
2877,
1998,
12542
],
[
3816,
13138,
2007,
3616,
3429,
2575,
2581,
1998,
9255,
999,
1030,
1001,
1002,
1003,
1034,
1004,
1008,
1006,
1007
],
[
2047,
4179,
2093
],
[
21628,
21628,
21628
],
[
14686,
1000,
3313,
1000,
1998,
1005,
2309,
1005,
1998,
1036,
2067,
26348,
1036
],
[
24471,
2140,
1024,
16770,
1024,
1013,
1013,
2742,
1012,
4012,
1013,
4130,
1029,
23032,
1027,
3643,
1004,
2060,
1027,
1015
],
[
10373,
1024,
5650,
1030,
2742,
1012,
4012,
1010,
3960,
1030,
29379,
1012,
22834
],
[
1746,
1861,
100,
100,
2007,
2394,
3816
],
[
15192,
13360,
11057,
11057,
11057,
11057,
2050,
1998,
22861,
10322,
10322,
10322,
10322,
10322
],
[
7861,
29147,
2072,
4542,
100,
1998,
3340,
100
],
[
3642,
1024,
1036,
20014,
2364,
1006,
1007,
1063,
2709,
1014,
1025,
1065,
1036
]
]
}