TomasFAV commited on
Commit
0a1d980
·
verified ·
1 Parent(s): 7abb510

Upload processor

Browse files
Files changed (4) hide show
  1. added_tokens.json +24 -88
  2. special_tokens_map.json +0 -266
  3. tokenizer.json +25 -601
  4. tokenizer_config.json +24 -574
added_tokens.json CHANGED
@@ -1,90 +1,26 @@
1
  {
2
- "</s_BIC>": 50344,
3
- "</s_IBAN>": 50380,
4
- "</s_bank_account_number>": 50353,
5
- "</s_const_symbol>": 50369,
6
- "</s_currency>": 50360,
7
- "</s_customer_name>": 50346,
8
- "</s_customer_register_id>": 50382,
9
- "</s_customer_tax_id>": 50359,
10
- "</s_due_date>": 50378,
11
- "</s_invoice_number>": 50365,
12
- "</s_issue_date>": 50356,
13
- "</s_payment>": 50377,
14
- "</s_supplier_name>": 50366,
15
- "</s_supplier_register_id>": 50345,
16
- "</s_supplier_tax_id>": 50364,
17
- "</s_taxable_supply_date>": 50373,
18
- "</s_total_price>": 50349,
19
- "</s_total_vat>": 50370,
20
- "</s_variable_symbol>": 50371,
21
- "<parsing>": 50361,
22
- "<reserved_10>": 50392,
23
- "<reserved_11>": 50393,
24
- "<reserved_12>": 50394,
25
- "<reserved_13>": 50395,
26
- "<reserved_14>": 50396,
27
- "<reserved_15>": 50397,
28
- "<reserved_16>": 50398,
29
- "<reserved_17>": 50399,
30
- "<reserved_18>": 50400,
31
- "<reserved_19>": 50401,
32
- "<reserved_1>": 50383,
33
- "<reserved_20>": 50402,
34
- "<reserved_21>": 50403,
35
- "<reserved_22>": 50404,
36
- "<reserved_23>": 50405,
37
- "<reserved_24>": 50406,
38
- "<reserved_25>": 50407,
39
- "<reserved_26>": 50408,
40
- "<reserved_27>": 50409,
41
- "<reserved_28>": 50410,
42
- "<reserved_29>": 50411,
43
- "<reserved_2>": 50384,
44
- "<reserved_30>": 50412,
45
- "<reserved_31>": 50413,
46
- "<reserved_32>": 50414,
47
- "<reserved_33>": 50415,
48
- "<reserved_34>": 50416,
49
- "<reserved_35>": 50417,
50
- "<reserved_36>": 50418,
51
- "<reserved_37>": 50419,
52
- "<reserved_38>": 50420,
53
- "<reserved_39>": 50421,
54
- "<reserved_3>": 50385,
55
- "<reserved_40>": 50422,
56
- "<reserved_41>": 50423,
57
- "<reserved_42>": 50424,
58
- "<reserved_43>": 50425,
59
- "<reserved_44>": 50426,
60
- "<reserved_45>": 50427,
61
- "<reserved_46>": 50428,
62
- "<reserved_47>": 50429,
63
- "<reserved_48>": 50430,
64
- "<reserved_49>": 50431,
65
- "<reserved_4>": 50386,
66
- "<reserved_5>": 50387,
67
- "<reserved_6>": 50388,
68
- "<reserved_7>": 50389,
69
- "<reserved_8>": 50390,
70
- "<reserved_9>": 50391,
71
- "<s_BIC>": 50372,
72
- "<s_IBAN>": 50363,
73
- "<s_bank_account_number>": 50367,
74
- "<s_const_symbol>": 50368,
75
- "<s_currency>": 50362,
76
- "<s_customer_name>": 50352,
77
- "<s_customer_register_id>": 50375,
78
- "<s_customer_tax_id>": 50355,
79
- "<s_due_date>": 50381,
80
- "<s_invoice_number>": 50354,
81
- "<s_issue_date>": 50358,
82
- "<s_payment>": 50379,
83
- "<s_supplier_name>": 50357,
84
- "<s_supplier_register_id>": 50351,
85
- "<s_supplier_tax_id>": 50348,
86
- "<s_taxable_supply_date>": 50374,
87
- "<s_total_price>": 50347,
88
- "<s_total_vat>": 50376,
89
- "<s_variable_symbol>": 50350
90
  }
 
1
  {
2
+ "<parsing>": 50344,
3
+ "<reserved_10>": 50354,
4
+ "<reserved_11>": 50355,
5
+ "<reserved_12>": 50356,
6
+ "<reserved_13>": 50357,
7
+ "<reserved_14>": 50358,
8
+ "<reserved_15>": 50359,
9
+ "<reserved_16>": 50360,
10
+ "<reserved_17>": 50361,
11
+ "<reserved_18>": 50362,
12
+ "<reserved_19>": 50363,
13
+ "<reserved_1>": 50345,
14
+ "<reserved_20>": 50364,
15
+ "<reserved_21>": 50365,
16
+ "<reserved_22>": 50366,
17
+ "<reserved_23>": 50367,
18
+ "<reserved_2>": 50346,
19
+ "<reserved_3>": 50347,
20
+ "<reserved_4>": 50348,
21
+ "<reserved_5>": 50349,
22
+ "<reserved_6>": 50350,
23
+ "<reserved_7>": 50351,
24
+ "<reserved_8>": 50352,
25
+ "<reserved_9>": 50353
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  }
special_tokens_map.json CHANGED
@@ -1,277 +1,11 @@
1
  {
2
  "additional_special_tokens": [
3
- {
4
- "content": "</s_BIC>",
5
- "lstrip": false,
6
- "normalized": false,
7
- "rstrip": false,
8
- "single_word": false
9
- },
10
- {
11
- "content": "</s_supplier_register_id>",
12
- "lstrip": false,
13
- "normalized": false,
14
- "rstrip": false,
15
- "single_word": false
16
- },
17
- {
18
- "content": "</s_customer_name>",
19
- "lstrip": false,
20
- "normalized": false,
21
- "rstrip": false,
22
- "single_word": false
23
- },
24
- {
25
- "content": "<s_total_price>",
26
- "lstrip": false,
27
- "normalized": false,
28
- "rstrip": false,
29
- "single_word": false
30
- },
31
- {
32
- "content": "<s_supplier_tax_id>",
33
- "lstrip": false,
34
- "normalized": false,
35
- "rstrip": false,
36
- "single_word": false
37
- },
38
- {
39
- "content": "</s_total_price>",
40
- "lstrip": false,
41
- "normalized": false,
42
- "rstrip": false,
43
- "single_word": false
44
- },
45
- {
46
- "content": "<s_variable_symbol>",
47
- "lstrip": false,
48
- "normalized": false,
49
- "rstrip": false,
50
- "single_word": false
51
- },
52
- {
53
- "content": "<s_supplier_register_id>",
54
- "lstrip": false,
55
- "normalized": false,
56
- "rstrip": false,
57
- "single_word": false
58
- },
59
- {
60
- "content": "<s_customer_name>",
61
- "lstrip": false,
62
- "normalized": false,
63
- "rstrip": false,
64
- "single_word": false
65
- },
66
- {
67
- "content": "</s_bank_account_number>",
68
- "lstrip": false,
69
- "normalized": false,
70
- "rstrip": false,
71
- "single_word": false
72
- },
73
- {
74
- "content": "<s_invoice_number>",
75
- "lstrip": false,
76
- "normalized": false,
77
- "rstrip": false,
78
- "single_word": false
79
- },
80
- {
81
- "content": "<s_customer_tax_id>",
82
- "lstrip": false,
83
- "normalized": false,
84
- "rstrip": false,
85
- "single_word": false
86
- },
87
- {
88
- "content": "</s_issue_date>",
89
- "lstrip": false,
90
- "normalized": false,
91
- "rstrip": false,
92
- "single_word": false
93
- },
94
- {
95
- "content": "<s_supplier_name>",
96
- "lstrip": false,
97
- "normalized": false,
98
- "rstrip": false,
99
- "single_word": false
100
- },
101
- {
102
- "content": "<s_issue_date>",
103
- "lstrip": false,
104
- "normalized": false,
105
- "rstrip": false,
106
- "single_word": false
107
- },
108
- {
109
- "content": "</s_customer_tax_id>",
110
- "lstrip": false,
111
- "normalized": false,
112
- "rstrip": false,
113
- "single_word": false
114
- },
115
- {
116
- "content": "</s_currency>",
117
- "lstrip": false,
118
- "normalized": false,
119
- "rstrip": false,
120
- "single_word": false
121
- },
122
  {
123
  "content": "<parsing>",
124
  "lstrip": false,
125
  "normalized": false,
126
  "rstrip": false,
127
  "single_word": false
128
- },
129
- {
130
- "content": "<s_currency>",
131
- "lstrip": false,
132
- "normalized": false,
133
- "rstrip": false,
134
- "single_word": false
135
- },
136
- {
137
- "content": "<s_IBAN>",
138
- "lstrip": false,
139
- "normalized": false,
140
- "rstrip": false,
141
- "single_word": false
142
- },
143
- {
144
- "content": "</s_supplier_tax_id>",
145
- "lstrip": false,
146
- "normalized": false,
147
- "rstrip": false,
148
- "single_word": false
149
- },
150
- {
151
- "content": "</s_invoice_number>",
152
- "lstrip": false,
153
- "normalized": false,
154
- "rstrip": false,
155
- "single_word": false
156
- },
157
- {
158
- "content": "</s_supplier_name>",
159
- "lstrip": false,
160
- "normalized": false,
161
- "rstrip": false,
162
- "single_word": false
163
- },
164
- {
165
- "content": "<s_bank_account_number>",
166
- "lstrip": false,
167
- "normalized": false,
168
- "rstrip": false,
169
- "single_word": false
170
- },
171
- {
172
- "content": "<s_const_symbol>",
173
- "lstrip": false,
174
- "normalized": false,
175
- "rstrip": false,
176
- "single_word": false
177
- },
178
- {
179
- "content": "</s_const_symbol>",
180
- "lstrip": false,
181
- "normalized": false,
182
- "rstrip": false,
183
- "single_word": false
184
- },
185
- {
186
- "content": "</s_total_vat>",
187
- "lstrip": false,
188
- "normalized": false,
189
- "rstrip": false,
190
- "single_word": false
191
- },
192
- {
193
- "content": "</s_variable_symbol>",
194
- "lstrip": false,
195
- "normalized": false,
196
- "rstrip": false,
197
- "single_word": false
198
- },
199
- {
200
- "content": "<s_BIC>",
201
- "lstrip": false,
202
- "normalized": false,
203
- "rstrip": false,
204
- "single_word": false
205
- },
206
- {
207
- "content": "</s_taxable_supply_date>",
208
- "lstrip": false,
209
- "normalized": false,
210
- "rstrip": false,
211
- "single_word": false
212
- },
213
- {
214
- "content": "<s_taxable_supply_date>",
215
- "lstrip": false,
216
- "normalized": false,
217
- "rstrip": false,
218
- "single_word": false
219
- },
220
- {
221
- "content": "<s_customer_register_id>",
222
- "lstrip": false,
223
- "normalized": false,
224
- "rstrip": false,
225
- "single_word": false
226
- },
227
- {
228
- "content": "<s_total_vat>",
229
- "lstrip": false,
230
- "normalized": false,
231
- "rstrip": false,
232
- "single_word": false
233
- },
234
- {
235
- "content": "</s_payment>",
236
- "lstrip": false,
237
- "normalized": false,
238
- "rstrip": false,
239
- "single_word": false
240
- },
241
- {
242
- "content": "</s_due_date>",
243
- "lstrip": false,
244
- "normalized": false,
245
- "rstrip": false,
246
- "single_word": false
247
- },
248
- {
249
- "content": "<s_payment>",
250
- "lstrip": false,
251
- "normalized": false,
252
- "rstrip": false,
253
- "single_word": false
254
- },
255
- {
256
- "content": "</s_IBAN>",
257
- "lstrip": false,
258
- "normalized": false,
259
- "rstrip": false,
260
- "single_word": false
261
- },
262
- {
263
- "content": "<s_due_date>",
264
- "lstrip": false,
265
- "normalized": false,
266
- "rstrip": false,
267
- "single_word": false
268
- },
269
- {
270
- "content": "</s_customer_register_id>",
271
- "lstrip": false,
272
- "normalized": false,
273
- "rstrip": false,
274
- "single_word": false
275
  }
276
  ],
277
  "eos_token": {
 
1
  {
2
  "additional_special_tokens": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  {
4
  "content": "<parsing>",
5
  "lstrip": false,
6
  "normalized": false,
7
  "rstrip": false,
8
  "single_word": false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  }
10
  ],
11
  "eos_token": {
tokenizer.json CHANGED
@@ -2,13 +2,13 @@
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
- "max_length": 128,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
9
  "padding": {
10
  "strategy": {
11
- "Fixed": 128
12
  },
13
  "direction": "Right",
14
  "pad_to_multiple_of": null,
@@ -946,159 +946,6 @@
946
  },
947
  {
948
  "id": 50344,
949
- "content": "</s_BIC>",
950
- "single_word": false,
951
- "lstrip": false,
952
- "rstrip": false,
953
- "normalized": false,
954
- "special": true
955
- },
956
- {
957
- "id": 50345,
958
- "content": "</s_supplier_register_id>",
959
- "single_word": false,
960
- "lstrip": false,
961
- "rstrip": false,
962
- "normalized": false,
963
- "special": true
964
- },
965
- {
966
- "id": 50346,
967
- "content": "</s_customer_name>",
968
- "single_word": false,
969
- "lstrip": false,
970
- "rstrip": false,
971
- "normalized": false,
972
- "special": true
973
- },
974
- {
975
- "id": 50347,
976
- "content": "<s_total_price>",
977
- "single_word": false,
978
- "lstrip": false,
979
- "rstrip": false,
980
- "normalized": false,
981
- "special": true
982
- },
983
- {
984
- "id": 50348,
985
- "content": "<s_supplier_tax_id>",
986
- "single_word": false,
987
- "lstrip": false,
988
- "rstrip": false,
989
- "normalized": false,
990
- "special": true
991
- },
992
- {
993
- "id": 50349,
994
- "content": "</s_total_price>",
995
- "single_word": false,
996
- "lstrip": false,
997
- "rstrip": false,
998
- "normalized": false,
999
- "special": true
1000
- },
1001
- {
1002
- "id": 50350,
1003
- "content": "<s_variable_symbol>",
1004
- "single_word": false,
1005
- "lstrip": false,
1006
- "rstrip": false,
1007
- "normalized": false,
1008
- "special": true
1009
- },
1010
- {
1011
- "id": 50351,
1012
- "content": "<s_supplier_register_id>",
1013
- "single_word": false,
1014
- "lstrip": false,
1015
- "rstrip": false,
1016
- "normalized": false,
1017
- "special": true
1018
- },
1019
- {
1020
- "id": 50352,
1021
- "content": "<s_customer_name>",
1022
- "single_word": false,
1023
- "lstrip": false,
1024
- "rstrip": false,
1025
- "normalized": false,
1026
- "special": true
1027
- },
1028
- {
1029
- "id": 50353,
1030
- "content": "</s_bank_account_number>",
1031
- "single_word": false,
1032
- "lstrip": false,
1033
- "rstrip": false,
1034
- "normalized": false,
1035
- "special": true
1036
- },
1037
- {
1038
- "id": 50354,
1039
- "content": "<s_invoice_number>",
1040
- "single_word": false,
1041
- "lstrip": false,
1042
- "rstrip": false,
1043
- "normalized": false,
1044
- "special": true
1045
- },
1046
- {
1047
- "id": 50355,
1048
- "content": "<s_customer_tax_id>",
1049
- "single_word": false,
1050
- "lstrip": false,
1051
- "rstrip": false,
1052
- "normalized": false,
1053
- "special": true
1054
- },
1055
- {
1056
- "id": 50356,
1057
- "content": "</s_issue_date>",
1058
- "single_word": false,
1059
- "lstrip": false,
1060
- "rstrip": false,
1061
- "normalized": false,
1062
- "special": true
1063
- },
1064
- {
1065
- "id": 50357,
1066
- "content": "<s_supplier_name>",
1067
- "single_word": false,
1068
- "lstrip": false,
1069
- "rstrip": false,
1070
- "normalized": false,
1071
- "special": true
1072
- },
1073
- {
1074
- "id": 50358,
1075
- "content": "<s_issue_date>",
1076
- "single_word": false,
1077
- "lstrip": false,
1078
- "rstrip": false,
1079
- "normalized": false,
1080
- "special": true
1081
- },
1082
- {
1083
- "id": 50359,
1084
- "content": "</s_customer_tax_id>",
1085
- "single_word": false,
1086
- "lstrip": false,
1087
- "rstrip": false,
1088
- "normalized": false,
1089
- "special": true
1090
- },
1091
- {
1092
- "id": 50360,
1093
- "content": "</s_currency>",
1094
- "single_word": false,
1095
- "lstrip": false,
1096
- "rstrip": false,
1097
- "normalized": false,
1098
- "special": true
1099
- },
1100
- {
1101
- "id": 50361,
1102
  "content": "<parsing>",
1103
  "single_word": false,
1104
  "lstrip": false,
@@ -1107,196 +954,7 @@
1107
  "special": true
1108
  },
1109
  {
1110
- "id": 50362,
1111
- "content": "<s_currency>",
1112
- "single_word": false,
1113
- "lstrip": false,
1114
- "rstrip": false,
1115
- "normalized": false,
1116
- "special": true
1117
- },
1118
- {
1119
- "id": 50363,
1120
- "content": "<s_IBAN>",
1121
- "single_word": false,
1122
- "lstrip": false,
1123
- "rstrip": false,
1124
- "normalized": false,
1125
- "special": true
1126
- },
1127
- {
1128
- "id": 50364,
1129
- "content": "</s_supplier_tax_id>",
1130
- "single_word": false,
1131
- "lstrip": false,
1132
- "rstrip": false,
1133
- "normalized": false,
1134
- "special": true
1135
- },
1136
- {
1137
- "id": 50365,
1138
- "content": "</s_invoice_number>",
1139
- "single_word": false,
1140
- "lstrip": false,
1141
- "rstrip": false,
1142
- "normalized": false,
1143
- "special": true
1144
- },
1145
- {
1146
- "id": 50366,
1147
- "content": "</s_supplier_name>",
1148
- "single_word": false,
1149
- "lstrip": false,
1150
- "rstrip": false,
1151
- "normalized": false,
1152
- "special": true
1153
- },
1154
- {
1155
- "id": 50367,
1156
- "content": "<s_bank_account_number>",
1157
- "single_word": false,
1158
- "lstrip": false,
1159
- "rstrip": false,
1160
- "normalized": false,
1161
- "special": true
1162
- },
1163
- {
1164
- "id": 50368,
1165
- "content": "<s_const_symbol>",
1166
- "single_word": false,
1167
- "lstrip": false,
1168
- "rstrip": false,
1169
- "normalized": false,
1170
- "special": true
1171
- },
1172
- {
1173
- "id": 50369,
1174
- "content": "</s_const_symbol>",
1175
- "single_word": false,
1176
- "lstrip": false,
1177
- "rstrip": false,
1178
- "normalized": false,
1179
- "special": true
1180
- },
1181
- {
1182
- "id": 50370,
1183
- "content": "</s_total_vat>",
1184
- "single_word": false,
1185
- "lstrip": false,
1186
- "rstrip": false,
1187
- "normalized": false,
1188
- "special": true
1189
- },
1190
- {
1191
- "id": 50371,
1192
- "content": "</s_variable_symbol>",
1193
- "single_word": false,
1194
- "lstrip": false,
1195
- "rstrip": false,
1196
- "normalized": false,
1197
- "special": true
1198
- },
1199
- {
1200
- "id": 50372,
1201
- "content": "<s_BIC>",
1202
- "single_word": false,
1203
- "lstrip": false,
1204
- "rstrip": false,
1205
- "normalized": false,
1206
- "special": true
1207
- },
1208
- {
1209
- "id": 50373,
1210
- "content": "</s_taxable_supply_date>",
1211
- "single_word": false,
1212
- "lstrip": false,
1213
- "rstrip": false,
1214
- "normalized": false,
1215
- "special": true
1216
- },
1217
- {
1218
- "id": 50374,
1219
- "content": "<s_taxable_supply_date>",
1220
- "single_word": false,
1221
- "lstrip": false,
1222
- "rstrip": false,
1223
- "normalized": false,
1224
- "special": true
1225
- },
1226
- {
1227
- "id": 50375,
1228
- "content": "<s_customer_register_id>",
1229
- "single_word": false,
1230
- "lstrip": false,
1231
- "rstrip": false,
1232
- "normalized": false,
1233
- "special": true
1234
- },
1235
- {
1236
- "id": 50376,
1237
- "content": "<s_total_vat>",
1238
- "single_word": false,
1239
- "lstrip": false,
1240
- "rstrip": false,
1241
- "normalized": false,
1242
- "special": true
1243
- },
1244
- {
1245
- "id": 50377,
1246
- "content": "</s_payment>",
1247
- "single_word": false,
1248
- "lstrip": false,
1249
- "rstrip": false,
1250
- "normalized": false,
1251
- "special": true
1252
- },
1253
- {
1254
- "id": 50378,
1255
- "content": "</s_due_date>",
1256
- "single_word": false,
1257
- "lstrip": false,
1258
- "rstrip": false,
1259
- "normalized": false,
1260
- "special": true
1261
- },
1262
- {
1263
- "id": 50379,
1264
- "content": "<s_payment>",
1265
- "single_word": false,
1266
- "lstrip": false,
1267
- "rstrip": false,
1268
- "normalized": false,
1269
- "special": true
1270
- },
1271
- {
1272
- "id": 50380,
1273
- "content": "</s_IBAN>",
1274
- "single_word": false,
1275
- "lstrip": false,
1276
- "rstrip": false,
1277
- "normalized": false,
1278
- "special": true
1279
- },
1280
- {
1281
- "id": 50381,
1282
- "content": "<s_due_date>",
1283
- "single_word": false,
1284
- "lstrip": false,
1285
- "rstrip": false,
1286
- "normalized": false,
1287
- "special": true
1288
- },
1289
- {
1290
- "id": 50382,
1291
- "content": "</s_customer_register_id>",
1292
- "single_word": false,
1293
- "lstrip": false,
1294
- "rstrip": false,
1295
- "normalized": false,
1296
- "special": true
1297
- },
1298
- {
1299
- "id": 50383,
1300
  "content": "<reserved_1>",
1301
  "single_word": false,
1302
  "lstrip": false,
@@ -1305,7 +963,7 @@
1305
  "special": false
1306
  },
1307
  {
1308
- "id": 50384,
1309
  "content": "<reserved_2>",
1310
  "single_word": false,
1311
  "lstrip": false,
@@ -1314,7 +972,7 @@
1314
  "special": false
1315
  },
1316
  {
1317
- "id": 50385,
1318
  "content": "<reserved_3>",
1319
  "single_word": false,
1320
  "lstrip": false,
@@ -1323,7 +981,7 @@
1323
  "special": false
1324
  },
1325
  {
1326
- "id": 50386,
1327
  "content": "<reserved_4>",
1328
  "single_word": false,
1329
  "lstrip": false,
@@ -1332,7 +990,7 @@
1332
  "special": false
1333
  },
1334
  {
1335
- "id": 50387,
1336
  "content": "<reserved_5>",
1337
  "single_word": false,
1338
  "lstrip": false,
@@ -1341,7 +999,7 @@
1341
  "special": false
1342
  },
1343
  {
1344
- "id": 50388,
1345
  "content": "<reserved_6>",
1346
  "single_word": false,
1347
  "lstrip": false,
@@ -1350,7 +1008,7 @@
1350
  "special": false
1351
  },
1352
  {
1353
- "id": 50389,
1354
  "content": "<reserved_7>",
1355
  "single_word": false,
1356
  "lstrip": false,
@@ -1359,7 +1017,7 @@
1359
  "special": false
1360
  },
1361
  {
1362
- "id": 50390,
1363
  "content": "<reserved_8>",
1364
  "single_word": false,
1365
  "lstrip": false,
@@ -1368,7 +1026,7 @@
1368
  "special": false
1369
  },
1370
  {
1371
- "id": 50391,
1372
  "content": "<reserved_9>",
1373
  "single_word": false,
1374
  "lstrip": false,
@@ -1377,7 +1035,7 @@
1377
  "special": false
1378
  },
1379
  {
1380
- "id": 50392,
1381
  "content": "<reserved_10>",
1382
  "single_word": false,
1383
  "lstrip": false,
@@ -1386,7 +1044,7 @@
1386
  "special": false
1387
  },
1388
  {
1389
- "id": 50393,
1390
  "content": "<reserved_11>",
1391
  "single_word": false,
1392
  "lstrip": false,
@@ -1395,7 +1053,7 @@
1395
  "special": false
1396
  },
1397
  {
1398
- "id": 50394,
1399
  "content": "<reserved_12>",
1400
  "single_word": false,
1401
  "lstrip": false,
@@ -1404,7 +1062,7 @@
1404
  "special": false
1405
  },
1406
  {
1407
- "id": 50395,
1408
  "content": "<reserved_13>",
1409
  "single_word": false,
1410
  "lstrip": false,
@@ -1413,7 +1071,7 @@
1413
  "special": false
1414
  },
1415
  {
1416
- "id": 50396,
1417
  "content": "<reserved_14>",
1418
  "single_word": false,
1419
  "lstrip": false,
@@ -1422,7 +1080,7 @@
1422
  "special": false
1423
  },
1424
  {
1425
- "id": 50397,
1426
  "content": "<reserved_15>",
1427
  "single_word": false,
1428
  "lstrip": false,
@@ -1431,7 +1089,7 @@
1431
  "special": false
1432
  },
1433
  {
1434
- "id": 50398,
1435
  "content": "<reserved_16>",
1436
  "single_word": false,
1437
  "lstrip": false,
@@ -1440,7 +1098,7 @@
1440
  "special": false
1441
  },
1442
  {
1443
- "id": 50399,
1444
  "content": "<reserved_17>",
1445
  "single_word": false,
1446
  "lstrip": false,
@@ -1449,7 +1107,7 @@
1449
  "special": false
1450
  },
1451
  {
1452
- "id": 50400,
1453
  "content": "<reserved_18>",
1454
  "single_word": false,
1455
  "lstrip": false,
@@ -1458,7 +1116,7 @@
1458
  "special": false
1459
  },
1460
  {
1461
- "id": 50401,
1462
  "content": "<reserved_19>",
1463
  "single_word": false,
1464
  "lstrip": false,
@@ -1467,7 +1125,7 @@
1467
  "special": false
1468
  },
1469
  {
1470
- "id": 50402,
1471
  "content": "<reserved_20>",
1472
  "single_word": false,
1473
  "lstrip": false,
@@ -1476,7 +1134,7 @@
1476
  "special": false
1477
  },
1478
  {
1479
- "id": 50403,
1480
  "content": "<reserved_21>",
1481
  "single_word": false,
1482
  "lstrip": false,
@@ -1485,7 +1143,7 @@
1485
  "special": false
1486
  },
1487
  {
1488
- "id": 50404,
1489
  "content": "<reserved_22>",
1490
  "single_word": false,
1491
  "lstrip": false,
@@ -1494,247 +1152,13 @@
1494
  "special": false
1495
  },
1496
  {
1497
- "id": 50405,
1498
  "content": "<reserved_23>",
1499
  "single_word": false,
1500
  "lstrip": false,
1501
  "rstrip": false,
1502
  "normalized": true,
1503
  "special": false
1504
- },
1505
- {
1506
- "id": 50406,
1507
- "content": "<reserved_24>",
1508
- "single_word": false,
1509
- "lstrip": false,
1510
- "rstrip": false,
1511
- "normalized": true,
1512
- "special": false
1513
- },
1514
- {
1515
- "id": 50407,
1516
- "content": "<reserved_25>",
1517
- "single_word": false,
1518
- "lstrip": false,
1519
- "rstrip": false,
1520
- "normalized": true,
1521
- "special": false
1522
- },
1523
- {
1524
- "id": 50408,
1525
- "content": "<reserved_26>",
1526
- "single_word": false,
1527
- "lstrip": false,
1528
- "rstrip": false,
1529
- "normalized": true,
1530
- "special": false
1531
- },
1532
- {
1533
- "id": 50409,
1534
- "content": "<reserved_27>",
1535
- "single_word": false,
1536
- "lstrip": false,
1537
- "rstrip": false,
1538
- "normalized": true,
1539
- "special": false
1540
- },
1541
- {
1542
- "id": 50410,
1543
- "content": "<reserved_28>",
1544
- "single_word": false,
1545
- "lstrip": false,
1546
- "rstrip": false,
1547
- "normalized": true,
1548
- "special": false
1549
- },
1550
- {
1551
- "id": 50411,
1552
- "content": "<reserved_29>",
1553
- "single_word": false,
1554
- "lstrip": false,
1555
- "rstrip": false,
1556
- "normalized": true,
1557
- "special": false
1558
- },
1559
- {
1560
- "id": 50412,
1561
- "content": "<reserved_30>",
1562
- "single_word": false,
1563
- "lstrip": false,
1564
- "rstrip": false,
1565
- "normalized": true,
1566
- "special": false
1567
- },
1568
- {
1569
- "id": 50413,
1570
- "content": "<reserved_31>",
1571
- "single_word": false,
1572
- "lstrip": false,
1573
- "rstrip": false,
1574
- "normalized": true,
1575
- "special": false
1576
- },
1577
- {
1578
- "id": 50414,
1579
- "content": "<reserved_32>",
1580
- "single_word": false,
1581
- "lstrip": false,
1582
- "rstrip": false,
1583
- "normalized": true,
1584
- "special": false
1585
- },
1586
- {
1587
- "id": 50415,
1588
- "content": "<reserved_33>",
1589
- "single_word": false,
1590
- "lstrip": false,
1591
- "rstrip": false,
1592
- "normalized": true,
1593
- "special": false
1594
- },
1595
- {
1596
- "id": 50416,
1597
- "content": "<reserved_34>",
1598
- "single_word": false,
1599
- "lstrip": false,
1600
- "rstrip": false,
1601
- "normalized": true,
1602
- "special": false
1603
- },
1604
- {
1605
- "id": 50417,
1606
- "content": "<reserved_35>",
1607
- "single_word": false,
1608
- "lstrip": false,
1609
- "rstrip": false,
1610
- "normalized": true,
1611
- "special": false
1612
- },
1613
- {
1614
- "id": 50418,
1615
- "content": "<reserved_36>",
1616
- "single_word": false,
1617
- "lstrip": false,
1618
- "rstrip": false,
1619
- "normalized": true,
1620
- "special": false
1621
- },
1622
- {
1623
- "id": 50419,
1624
- "content": "<reserved_37>",
1625
- "single_word": false,
1626
- "lstrip": false,
1627
- "rstrip": false,
1628
- "normalized": true,
1629
- "special": false
1630
- },
1631
- {
1632
- "id": 50420,
1633
- "content": "<reserved_38>",
1634
- "single_word": false,
1635
- "lstrip": false,
1636
- "rstrip": false,
1637
- "normalized": true,
1638
- "special": false
1639
- },
1640
- {
1641
- "id": 50421,
1642
- "content": "<reserved_39>",
1643
- "single_word": false,
1644
- "lstrip": false,
1645
- "rstrip": false,
1646
- "normalized": true,
1647
- "special": false
1648
- },
1649
- {
1650
- "id": 50422,
1651
- "content": "<reserved_40>",
1652
- "single_word": false,
1653
- "lstrip": false,
1654
- "rstrip": false,
1655
- "normalized": true,
1656
- "special": false
1657
- },
1658
- {
1659
- "id": 50423,
1660
- "content": "<reserved_41>",
1661
- "single_word": false,
1662
- "lstrip": false,
1663
- "rstrip": false,
1664
- "normalized": true,
1665
- "special": false
1666
- },
1667
- {
1668
- "id": 50424,
1669
- "content": "<reserved_42>",
1670
- "single_word": false,
1671
- "lstrip": false,
1672
- "rstrip": false,
1673
- "normalized": true,
1674
- "special": false
1675
- },
1676
- {
1677
- "id": 50425,
1678
- "content": "<reserved_43>",
1679
- "single_word": false,
1680
- "lstrip": false,
1681
- "rstrip": false,
1682
- "normalized": true,
1683
- "special": false
1684
- },
1685
- {
1686
- "id": 50426,
1687
- "content": "<reserved_44>",
1688
- "single_word": false,
1689
- "lstrip": false,
1690
- "rstrip": false,
1691
- "normalized": true,
1692
- "special": false
1693
- },
1694
- {
1695
- "id": 50427,
1696
- "content": "<reserved_45>",
1697
- "single_word": false,
1698
- "lstrip": false,
1699
- "rstrip": false,
1700
- "normalized": true,
1701
- "special": false
1702
- },
1703
- {
1704
- "id": 50428,
1705
- "content": "<reserved_46>",
1706
- "single_word": false,
1707
- "lstrip": false,
1708
- "rstrip": false,
1709
- "normalized": true,
1710
- "special": false
1711
- },
1712
- {
1713
- "id": 50429,
1714
- "content": "<reserved_47>",
1715
- "single_word": false,
1716
- "lstrip": false,
1717
- "rstrip": false,
1718
- "normalized": true,
1719
- "special": false
1720
- },
1721
- {
1722
- "id": 50430,
1723
- "content": "<reserved_48>",
1724
- "single_word": false,
1725
- "lstrip": false,
1726
- "rstrip": false,
1727
- "normalized": true,
1728
- "special": false
1729
- },
1730
- {
1731
- "id": 50431,
1732
- "content": "<reserved_49>",
1733
- "single_word": false,
1734
- "lstrip": false,
1735
- "rstrip": false,
1736
- "normalized": true,
1737
- "special": false
1738
  }
1739
  ],
1740
  "normalizer": {
 
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
+ "max_length": 512,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
9
  "padding": {
10
  "strategy": {
11
+ "Fixed": 512
12
  },
13
  "direction": "Right",
14
  "pad_to_multiple_of": null,
 
946
  },
947
  {
948
  "id": 50344,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
949
  "content": "<parsing>",
950
  "single_word": false,
951
  "lstrip": false,
 
954
  "special": true
955
  },
956
  {
957
+ "id": 50345,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
958
  "content": "<reserved_1>",
959
  "single_word": false,
960
  "lstrip": false,
 
963
  "special": false
964
  },
965
  {
966
+ "id": 50346,
967
  "content": "<reserved_2>",
968
  "single_word": false,
969
  "lstrip": false,
 
972
  "special": false
973
  },
974
  {
975
+ "id": 50347,
976
  "content": "<reserved_3>",
977
  "single_word": false,
978
  "lstrip": false,
 
981
  "special": false
982
  },
983
  {
984
+ "id": 50348,
985
  "content": "<reserved_4>",
986
  "single_word": false,
987
  "lstrip": false,
 
990
  "special": false
991
  },
992
  {
993
+ "id": 50349,
994
  "content": "<reserved_5>",
995
  "single_word": false,
996
  "lstrip": false,
 
999
  "special": false
1000
  },
1001
  {
1002
+ "id": 50350,
1003
  "content": "<reserved_6>",
1004
  "single_word": false,
1005
  "lstrip": false,
 
1008
  "special": false
1009
  },
1010
  {
1011
+ "id": 50351,
1012
  "content": "<reserved_7>",
1013
  "single_word": false,
1014
  "lstrip": false,
 
1017
  "special": false
1018
  },
1019
  {
1020
+ "id": 50352,
1021
  "content": "<reserved_8>",
1022
  "single_word": false,
1023
  "lstrip": false,
 
1026
  "special": false
1027
  },
1028
  {
1029
+ "id": 50353,
1030
  "content": "<reserved_9>",
1031
  "single_word": false,
1032
  "lstrip": false,
 
1035
  "special": false
1036
  },
1037
  {
1038
+ "id": 50354,
1039
  "content": "<reserved_10>",
1040
  "single_word": false,
1041
  "lstrip": false,
 
1044
  "special": false
1045
  },
1046
  {
1047
+ "id": 50355,
1048
  "content": "<reserved_11>",
1049
  "single_word": false,
1050
  "lstrip": false,
 
1053
  "special": false
1054
  },
1055
  {
1056
+ "id": 50356,
1057
  "content": "<reserved_12>",
1058
  "single_word": false,
1059
  "lstrip": false,
 
1062
  "special": false
1063
  },
1064
  {
1065
+ "id": 50357,
1066
  "content": "<reserved_13>",
1067
  "single_word": false,
1068
  "lstrip": false,
 
1071
  "special": false
1072
  },
1073
  {
1074
+ "id": 50358,
1075
  "content": "<reserved_14>",
1076
  "single_word": false,
1077
  "lstrip": false,
 
1080
  "special": false
1081
  },
1082
  {
1083
+ "id": 50359,
1084
  "content": "<reserved_15>",
1085
  "single_word": false,
1086
  "lstrip": false,
 
1089
  "special": false
1090
  },
1091
  {
1092
+ "id": 50360,
1093
  "content": "<reserved_16>",
1094
  "single_word": false,
1095
  "lstrip": false,
 
1098
  "special": false
1099
  },
1100
  {
1101
+ "id": 50361,
1102
  "content": "<reserved_17>",
1103
  "single_word": false,
1104
  "lstrip": false,
 
1107
  "special": false
1108
  },
1109
  {
1110
+ "id": 50362,
1111
  "content": "<reserved_18>",
1112
  "single_word": false,
1113
  "lstrip": false,
 
1116
  "special": false
1117
  },
1118
  {
1119
+ "id": 50363,
1120
  "content": "<reserved_19>",
1121
  "single_word": false,
1122
  "lstrip": false,
 
1125
  "special": false
1126
  },
1127
  {
1128
+ "id": 50364,
1129
  "content": "<reserved_20>",
1130
  "single_word": false,
1131
  "lstrip": false,
 
1134
  "special": false
1135
  },
1136
  {
1137
+ "id": 50365,
1138
  "content": "<reserved_21>",
1139
  "single_word": false,
1140
  "lstrip": false,
 
1143
  "special": false
1144
  },
1145
  {
1146
+ "id": 50366,
1147
  "content": "<reserved_22>",
1148
  "single_word": false,
1149
  "lstrip": false,
 
1152
  "special": false
1153
  },
1154
  {
1155
+ "id": 50367,
1156
  "content": "<reserved_23>",
1157
  "single_word": false,
1158
  "lstrip": false,
1159
  "rstrip": false,
1160
  "normalized": true,
1161
  "special": false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1162
  }
1163
  ],
1164
  "normalizer": {
tokenizer_config.json CHANGED
@@ -826,142 +826,6 @@
826
  "special": true
827
  },
828
  "50344": {
829
- "content": "</s_BIC>",
830
- "lstrip": false,
831
- "normalized": false,
832
- "rstrip": false,
833
- "single_word": false,
834
- "special": true
835
- },
836
- "50345": {
837
- "content": "</s_supplier_register_id>",
838
- "lstrip": false,
839
- "normalized": false,
840
- "rstrip": false,
841
- "single_word": false,
842
- "special": true
843
- },
844
- "50346": {
845
- "content": "</s_customer_name>",
846
- "lstrip": false,
847
- "normalized": false,
848
- "rstrip": false,
849
- "single_word": false,
850
- "special": true
851
- },
852
- "50347": {
853
- "content": "<s_total_price>",
854
- "lstrip": false,
855
- "normalized": false,
856
- "rstrip": false,
857
- "single_word": false,
858
- "special": true
859
- },
860
- "50348": {
861
- "content": "<s_supplier_tax_id>",
862
- "lstrip": false,
863
- "normalized": false,
864
- "rstrip": false,
865
- "single_word": false,
866
- "special": true
867
- },
868
- "50349": {
869
- "content": "</s_total_price>",
870
- "lstrip": false,
871
- "normalized": false,
872
- "rstrip": false,
873
- "single_word": false,
874
- "special": true
875
- },
876
- "50350": {
877
- "content": "<s_variable_symbol>",
878
- "lstrip": false,
879
- "normalized": false,
880
- "rstrip": false,
881
- "single_word": false,
882
- "special": true
883
- },
884
- "50351": {
885
- "content": "<s_supplier_register_id>",
886
- "lstrip": false,
887
- "normalized": false,
888
- "rstrip": false,
889
- "single_word": false,
890
- "special": true
891
- },
892
- "50352": {
893
- "content": "<s_customer_name>",
894
- "lstrip": false,
895
- "normalized": false,
896
- "rstrip": false,
897
- "single_word": false,
898
- "special": true
899
- },
900
- "50353": {
901
- "content": "</s_bank_account_number>",
902
- "lstrip": false,
903
- "normalized": false,
904
- "rstrip": false,
905
- "single_word": false,
906
- "special": true
907
- },
908
- "50354": {
909
- "content": "<s_invoice_number>",
910
- "lstrip": false,
911
- "normalized": false,
912
- "rstrip": false,
913
- "single_word": false,
914
- "special": true
915
- },
916
- "50355": {
917
- "content": "<s_customer_tax_id>",
918
- "lstrip": false,
919
- "normalized": false,
920
- "rstrip": false,
921
- "single_word": false,
922
- "special": true
923
- },
924
- "50356": {
925
- "content": "</s_issue_date>",
926
- "lstrip": false,
927
- "normalized": false,
928
- "rstrip": false,
929
- "single_word": false,
930
- "special": true
931
- },
932
- "50357": {
933
- "content": "<s_supplier_name>",
934
- "lstrip": false,
935
- "normalized": false,
936
- "rstrip": false,
937
- "single_word": false,
938
- "special": true
939
- },
940
- "50358": {
941
- "content": "<s_issue_date>",
942
- "lstrip": false,
943
- "normalized": false,
944
- "rstrip": false,
945
- "single_word": false,
946
- "special": true
947
- },
948
- "50359": {
949
- "content": "</s_customer_tax_id>",
950
- "lstrip": false,
951
- "normalized": false,
952
- "rstrip": false,
953
- "single_word": false,
954
- "special": true
955
- },
956
- "50360": {
957
- "content": "</s_currency>",
958
- "lstrip": false,
959
- "normalized": false,
960
- "rstrip": false,
961
- "single_word": false,
962
- "special": true
963
- },
964
- "50361": {
965
  "content": "<parsing>",
966
  "lstrip": false,
967
  "normalized": false,
@@ -969,175 +833,7 @@
969
  "single_word": false,
970
  "special": true
971
  },
972
- "50362": {
973
- "content": "<s_currency>",
974
- "lstrip": false,
975
- "normalized": false,
976
- "rstrip": false,
977
- "single_word": false,
978
- "special": true
979
- },
980
- "50363": {
981
- "content": "<s_IBAN>",
982
- "lstrip": false,
983
- "normalized": false,
984
- "rstrip": false,
985
- "single_word": false,
986
- "special": true
987
- },
988
- "50364": {
989
- "content": "</s_supplier_tax_id>",
990
- "lstrip": false,
991
- "normalized": false,
992
- "rstrip": false,
993
- "single_word": false,
994
- "special": true
995
- },
996
- "50365": {
997
- "content": "</s_invoice_number>",
998
- "lstrip": false,
999
- "normalized": false,
1000
- "rstrip": false,
1001
- "single_word": false,
1002
- "special": true
1003
- },
1004
- "50366": {
1005
- "content": "</s_supplier_name>",
1006
- "lstrip": false,
1007
- "normalized": false,
1008
- "rstrip": false,
1009
- "single_word": false,
1010
- "special": true
1011
- },
1012
- "50367": {
1013
- "content": "<s_bank_account_number>",
1014
- "lstrip": false,
1015
- "normalized": false,
1016
- "rstrip": false,
1017
- "single_word": false,
1018
- "special": true
1019
- },
1020
- "50368": {
1021
- "content": "<s_const_symbol>",
1022
- "lstrip": false,
1023
- "normalized": false,
1024
- "rstrip": false,
1025
- "single_word": false,
1026
- "special": true
1027
- },
1028
- "50369": {
1029
- "content": "</s_const_symbol>",
1030
- "lstrip": false,
1031
- "normalized": false,
1032
- "rstrip": false,
1033
- "single_word": false,
1034
- "special": true
1035
- },
1036
- "50370": {
1037
- "content": "</s_total_vat>",
1038
- "lstrip": false,
1039
- "normalized": false,
1040
- "rstrip": false,
1041
- "single_word": false,
1042
- "special": true
1043
- },
1044
- "50371": {
1045
- "content": "</s_variable_symbol>",
1046
- "lstrip": false,
1047
- "normalized": false,
1048
- "rstrip": false,
1049
- "single_word": false,
1050
- "special": true
1051
- },
1052
- "50372": {
1053
- "content": "<s_BIC>",
1054
- "lstrip": false,
1055
- "normalized": false,
1056
- "rstrip": false,
1057
- "single_word": false,
1058
- "special": true
1059
- },
1060
- "50373": {
1061
- "content": "</s_taxable_supply_date>",
1062
- "lstrip": false,
1063
- "normalized": false,
1064
- "rstrip": false,
1065
- "single_word": false,
1066
- "special": true
1067
- },
1068
- "50374": {
1069
- "content": "<s_taxable_supply_date>",
1070
- "lstrip": false,
1071
- "normalized": false,
1072
- "rstrip": false,
1073
- "single_word": false,
1074
- "special": true
1075
- },
1076
- "50375": {
1077
- "content": "<s_customer_register_id>",
1078
- "lstrip": false,
1079
- "normalized": false,
1080
- "rstrip": false,
1081
- "single_word": false,
1082
- "special": true
1083
- },
1084
- "50376": {
1085
- "content": "<s_total_vat>",
1086
- "lstrip": false,
1087
- "normalized": false,
1088
- "rstrip": false,
1089
- "single_word": false,
1090
- "special": true
1091
- },
1092
- "50377": {
1093
- "content": "</s_payment>",
1094
- "lstrip": false,
1095
- "normalized": false,
1096
- "rstrip": false,
1097
- "single_word": false,
1098
- "special": true
1099
- },
1100
- "50378": {
1101
- "content": "</s_due_date>",
1102
- "lstrip": false,
1103
- "normalized": false,
1104
- "rstrip": false,
1105
- "single_word": false,
1106
- "special": true
1107
- },
1108
- "50379": {
1109
- "content": "<s_payment>",
1110
- "lstrip": false,
1111
- "normalized": false,
1112
- "rstrip": false,
1113
- "single_word": false,
1114
- "special": true
1115
- },
1116
- "50380": {
1117
- "content": "</s_IBAN>",
1118
- "lstrip": false,
1119
- "normalized": false,
1120
- "rstrip": false,
1121
- "single_word": false,
1122
- "special": true
1123
- },
1124
- "50381": {
1125
- "content": "<s_due_date>",
1126
- "lstrip": false,
1127
- "normalized": false,
1128
- "rstrip": false,
1129
- "single_word": false,
1130
- "special": true
1131
- },
1132
- "50382": {
1133
- "content": "</s_customer_register_id>",
1134
- "lstrip": false,
1135
- "normalized": false,
1136
- "rstrip": false,
1137
- "single_word": false,
1138
- "special": true
1139
- },
1140
- "50383": {
1141
  "content": "<reserved_1>",
1142
  "lstrip": false,
1143
  "normalized": true,
@@ -1145,7 +841,7 @@
1145
  "single_word": false,
1146
  "special": false
1147
  },
1148
- "50384": {
1149
  "content": "<reserved_2>",
1150
  "lstrip": false,
1151
  "normalized": true,
@@ -1153,7 +849,7 @@
1153
  "single_word": false,
1154
  "special": false
1155
  },
1156
- "50385": {
1157
  "content": "<reserved_3>",
1158
  "lstrip": false,
1159
  "normalized": true,
@@ -1161,7 +857,7 @@
1161
  "single_word": false,
1162
  "special": false
1163
  },
1164
- "50386": {
1165
  "content": "<reserved_4>",
1166
  "lstrip": false,
1167
  "normalized": true,
@@ -1169,7 +865,7 @@
1169
  "single_word": false,
1170
  "special": false
1171
  },
1172
- "50387": {
1173
  "content": "<reserved_5>",
1174
  "lstrip": false,
1175
  "normalized": true,
@@ -1177,7 +873,7 @@
1177
  "single_word": false,
1178
  "special": false
1179
  },
1180
- "50388": {
1181
  "content": "<reserved_6>",
1182
  "lstrip": false,
1183
  "normalized": true,
@@ -1185,7 +881,7 @@
1185
  "single_word": false,
1186
  "special": false
1187
  },
1188
- "50389": {
1189
  "content": "<reserved_7>",
1190
  "lstrip": false,
1191
  "normalized": true,
@@ -1193,7 +889,7 @@
1193
  "single_word": false,
1194
  "special": false
1195
  },
1196
- "50390": {
1197
  "content": "<reserved_8>",
1198
  "lstrip": false,
1199
  "normalized": true,
@@ -1201,7 +897,7 @@
1201
  "single_word": false,
1202
  "special": false
1203
  },
1204
- "50391": {
1205
  "content": "<reserved_9>",
1206
  "lstrip": false,
1207
  "normalized": true,
@@ -1209,7 +905,7 @@
1209
  "single_word": false,
1210
  "special": false
1211
  },
1212
- "50392": {
1213
  "content": "<reserved_10>",
1214
  "lstrip": false,
1215
  "normalized": true,
@@ -1217,7 +913,7 @@
1217
  "single_word": false,
1218
  "special": false
1219
  },
1220
- "50393": {
1221
  "content": "<reserved_11>",
1222
  "lstrip": false,
1223
  "normalized": true,
@@ -1225,7 +921,7 @@
1225
  "single_word": false,
1226
  "special": false
1227
  },
1228
- "50394": {
1229
  "content": "<reserved_12>",
1230
  "lstrip": false,
1231
  "normalized": true,
@@ -1233,7 +929,7 @@
1233
  "single_word": false,
1234
  "special": false
1235
  },
1236
- "50395": {
1237
  "content": "<reserved_13>",
1238
  "lstrip": false,
1239
  "normalized": true,
@@ -1241,7 +937,7 @@
1241
  "single_word": false,
1242
  "special": false
1243
  },
1244
- "50396": {
1245
  "content": "<reserved_14>",
1246
  "lstrip": false,
1247
  "normalized": true,
@@ -1249,7 +945,7 @@
1249
  "single_word": false,
1250
  "special": false
1251
  },
1252
- "50397": {
1253
  "content": "<reserved_15>",
1254
  "lstrip": false,
1255
  "normalized": true,
@@ -1257,7 +953,7 @@
1257
  "single_word": false,
1258
  "special": false
1259
  },
1260
- "50398": {
1261
  "content": "<reserved_16>",
1262
  "lstrip": false,
1263
  "normalized": true,
@@ -1265,7 +961,7 @@
1265
  "single_word": false,
1266
  "special": false
1267
  },
1268
- "50399": {
1269
  "content": "<reserved_17>",
1270
  "lstrip": false,
1271
  "normalized": true,
@@ -1273,7 +969,7 @@
1273
  "single_word": false,
1274
  "special": false
1275
  },
1276
- "50400": {
1277
  "content": "<reserved_18>",
1278
  "lstrip": false,
1279
  "normalized": true,
@@ -1281,7 +977,7 @@
1281
  "single_word": false,
1282
  "special": false
1283
  },
1284
- "50401": {
1285
  "content": "<reserved_19>",
1286
  "lstrip": false,
1287
  "normalized": true,
@@ -1289,7 +985,7 @@
1289
  "single_word": false,
1290
  "special": false
1291
  },
1292
- "50402": {
1293
  "content": "<reserved_20>",
1294
  "lstrip": false,
1295
  "normalized": true,
@@ -1297,7 +993,7 @@
1297
  "single_word": false,
1298
  "special": false
1299
  },
1300
- "50403": {
1301
  "content": "<reserved_21>",
1302
  "lstrip": false,
1303
  "normalized": true,
@@ -1305,7 +1001,7 @@
1305
  "single_word": false,
1306
  "special": false
1307
  },
1308
- "50404": {
1309
  "content": "<reserved_22>",
1310
  "lstrip": false,
1311
  "normalized": true,
@@ -1313,263 +1009,17 @@
1313
  "single_word": false,
1314
  "special": false
1315
  },
1316
- "50405": {
1317
  "content": "<reserved_23>",
1318
  "lstrip": false,
1319
  "normalized": true,
1320
  "rstrip": false,
1321
  "single_word": false,
1322
  "special": false
1323
- },
1324
- "50406": {
1325
- "content": "<reserved_24>",
1326
- "lstrip": false,
1327
- "normalized": true,
1328
- "rstrip": false,
1329
- "single_word": false,
1330
- "special": false
1331
- },
1332
- "50407": {
1333
- "content": "<reserved_25>",
1334
- "lstrip": false,
1335
- "normalized": true,
1336
- "rstrip": false,
1337
- "single_word": false,
1338
- "special": false
1339
- },
1340
- "50408": {
1341
- "content": "<reserved_26>",
1342
- "lstrip": false,
1343
- "normalized": true,
1344
- "rstrip": false,
1345
- "single_word": false,
1346
- "special": false
1347
- },
1348
- "50409": {
1349
- "content": "<reserved_27>",
1350
- "lstrip": false,
1351
- "normalized": true,
1352
- "rstrip": false,
1353
- "single_word": false,
1354
- "special": false
1355
- },
1356
- "50410": {
1357
- "content": "<reserved_28>",
1358
- "lstrip": false,
1359
- "normalized": true,
1360
- "rstrip": false,
1361
- "single_word": false,
1362
- "special": false
1363
- },
1364
- "50411": {
1365
- "content": "<reserved_29>",
1366
- "lstrip": false,
1367
- "normalized": true,
1368
- "rstrip": false,
1369
- "single_word": false,
1370
- "special": false
1371
- },
1372
- "50412": {
1373
- "content": "<reserved_30>",
1374
- "lstrip": false,
1375
- "normalized": true,
1376
- "rstrip": false,
1377
- "single_word": false,
1378
- "special": false
1379
- },
1380
- "50413": {
1381
- "content": "<reserved_31>",
1382
- "lstrip": false,
1383
- "normalized": true,
1384
- "rstrip": false,
1385
- "single_word": false,
1386
- "special": false
1387
- },
1388
- "50414": {
1389
- "content": "<reserved_32>",
1390
- "lstrip": false,
1391
- "normalized": true,
1392
- "rstrip": false,
1393
- "single_word": false,
1394
- "special": false
1395
- },
1396
- "50415": {
1397
- "content": "<reserved_33>",
1398
- "lstrip": false,
1399
- "normalized": true,
1400
- "rstrip": false,
1401
- "single_word": false,
1402
- "special": false
1403
- },
1404
- "50416": {
1405
- "content": "<reserved_34>",
1406
- "lstrip": false,
1407
- "normalized": true,
1408
- "rstrip": false,
1409
- "single_word": false,
1410
- "special": false
1411
- },
1412
- "50417": {
1413
- "content": "<reserved_35>",
1414
- "lstrip": false,
1415
- "normalized": true,
1416
- "rstrip": false,
1417
- "single_word": false,
1418
- "special": false
1419
- },
1420
- "50418": {
1421
- "content": "<reserved_36>",
1422
- "lstrip": false,
1423
- "normalized": true,
1424
- "rstrip": false,
1425
- "single_word": false,
1426
- "special": false
1427
- },
1428
- "50419": {
1429
- "content": "<reserved_37>",
1430
- "lstrip": false,
1431
- "normalized": true,
1432
- "rstrip": false,
1433
- "single_word": false,
1434
- "special": false
1435
- },
1436
- "50420": {
1437
- "content": "<reserved_38>",
1438
- "lstrip": false,
1439
- "normalized": true,
1440
- "rstrip": false,
1441
- "single_word": false,
1442
- "special": false
1443
- },
1444
- "50421": {
1445
- "content": "<reserved_39>",
1446
- "lstrip": false,
1447
- "normalized": true,
1448
- "rstrip": false,
1449
- "single_word": false,
1450
- "special": false
1451
- },
1452
- "50422": {
1453
- "content": "<reserved_40>",
1454
- "lstrip": false,
1455
- "normalized": true,
1456
- "rstrip": false,
1457
- "single_word": false,
1458
- "special": false
1459
- },
1460
- "50423": {
1461
- "content": "<reserved_41>",
1462
- "lstrip": false,
1463
- "normalized": true,
1464
- "rstrip": false,
1465
- "single_word": false,
1466
- "special": false
1467
- },
1468
- "50424": {
1469
- "content": "<reserved_42>",
1470
- "lstrip": false,
1471
- "normalized": true,
1472
- "rstrip": false,
1473
- "single_word": false,
1474
- "special": false
1475
- },
1476
- "50425": {
1477
- "content": "<reserved_43>",
1478
- "lstrip": false,
1479
- "normalized": true,
1480
- "rstrip": false,
1481
- "single_word": false,
1482
- "special": false
1483
- },
1484
- "50426": {
1485
- "content": "<reserved_44>",
1486
- "lstrip": false,
1487
- "normalized": true,
1488
- "rstrip": false,
1489
- "single_word": false,
1490
- "special": false
1491
- },
1492
- "50427": {
1493
- "content": "<reserved_45>",
1494
- "lstrip": false,
1495
- "normalized": true,
1496
- "rstrip": false,
1497
- "single_word": false,
1498
- "special": false
1499
- },
1500
- "50428": {
1501
- "content": "<reserved_46>",
1502
- "lstrip": false,
1503
- "normalized": true,
1504
- "rstrip": false,
1505
- "single_word": false,
1506
- "special": false
1507
- },
1508
- "50429": {
1509
- "content": "<reserved_47>",
1510
- "lstrip": false,
1511
- "normalized": true,
1512
- "rstrip": false,
1513
- "single_word": false,
1514
- "special": false
1515
- },
1516
- "50430": {
1517
- "content": "<reserved_48>",
1518
- "lstrip": false,
1519
- "normalized": true,
1520
- "rstrip": false,
1521
- "single_word": false,
1522
- "special": false
1523
- },
1524
- "50431": {
1525
- "content": "<reserved_49>",
1526
- "lstrip": false,
1527
- "normalized": true,
1528
- "rstrip": false,
1529
- "single_word": false,
1530
- "special": false
1531
  }
1532
  },
1533
  "additional_special_tokens": [
1534
- "</s_BIC>",
1535
- "</s_supplier_register_id>",
1536
- "</s_customer_name>",
1537
- "<s_total_price>",
1538
- "<s_supplier_tax_id>",
1539
- "</s_total_price>",
1540
- "<s_variable_symbol>",
1541
- "<s_supplier_register_id>",
1542
- "<s_customer_name>",
1543
- "</s_bank_account_number>",
1544
- "<s_invoice_number>",
1545
- "<s_customer_tax_id>",
1546
- "</s_issue_date>",
1547
- "<s_supplier_name>",
1548
- "<s_issue_date>",
1549
- "</s_customer_tax_id>",
1550
- "</s_currency>",
1551
- "<parsing>",
1552
- "<s_currency>",
1553
- "<s_IBAN>",
1554
- "</s_supplier_tax_id>",
1555
- "</s_invoice_number>",
1556
- "</s_supplier_name>",
1557
- "<s_bank_account_number>",
1558
- "<s_const_symbol>",
1559
- "</s_const_symbol>",
1560
- "</s_total_vat>",
1561
- "</s_variable_symbol>",
1562
- "<s_BIC>",
1563
- "</s_taxable_supply_date>",
1564
- "<s_taxable_supply_date>",
1565
- "<s_customer_register_id>",
1566
- "<s_total_vat>",
1567
- "</s_payment>",
1568
- "</s_due_date>",
1569
- "<s_payment>",
1570
- "</s_IBAN>",
1571
- "<s_due_date>",
1572
- "</s_customer_register_id>"
1573
  ],
1574
  "clean_up_tokenization_spaces": false,
1575
  "eos_token": "</s>",
 
826
  "special": true
827
  },
828
  "50344": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
829
  "content": "<parsing>",
830
  "lstrip": false,
831
  "normalized": false,
 
833
  "single_word": false,
834
  "special": true
835
  },
836
+ "50345": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
837
  "content": "<reserved_1>",
838
  "lstrip": false,
839
  "normalized": true,
 
841
  "single_word": false,
842
  "special": false
843
  },
844
+ "50346": {
845
  "content": "<reserved_2>",
846
  "lstrip": false,
847
  "normalized": true,
 
849
  "single_word": false,
850
  "special": false
851
  },
852
+ "50347": {
853
  "content": "<reserved_3>",
854
  "lstrip": false,
855
  "normalized": true,
 
857
  "single_word": false,
858
  "special": false
859
  },
860
+ "50348": {
861
  "content": "<reserved_4>",
862
  "lstrip": false,
863
  "normalized": true,
 
865
  "single_word": false,
866
  "special": false
867
  },
868
+ "50349": {
869
  "content": "<reserved_5>",
870
  "lstrip": false,
871
  "normalized": true,
 
873
  "single_word": false,
874
  "special": false
875
  },
876
+ "50350": {
877
  "content": "<reserved_6>",
878
  "lstrip": false,
879
  "normalized": true,
 
881
  "single_word": false,
882
  "special": false
883
  },
884
+ "50351": {
885
  "content": "<reserved_7>",
886
  "lstrip": false,
887
  "normalized": true,
 
889
  "single_word": false,
890
  "special": false
891
  },
892
+ "50352": {
893
  "content": "<reserved_8>",
894
  "lstrip": false,
895
  "normalized": true,
 
897
  "single_word": false,
898
  "special": false
899
  },
900
+ "50353": {
901
  "content": "<reserved_9>",
902
  "lstrip": false,
903
  "normalized": true,
 
905
  "single_word": false,
906
  "special": false
907
  },
908
+ "50354": {
909
  "content": "<reserved_10>",
910
  "lstrip": false,
911
  "normalized": true,
 
913
  "single_word": false,
914
  "special": false
915
  },
916
+ "50355": {
917
  "content": "<reserved_11>",
918
  "lstrip": false,
919
  "normalized": true,
 
921
  "single_word": false,
922
  "special": false
923
  },
924
+ "50356": {
925
  "content": "<reserved_12>",
926
  "lstrip": false,
927
  "normalized": true,
 
929
  "single_word": false,
930
  "special": false
931
  },
932
+ "50357": {
933
  "content": "<reserved_13>",
934
  "lstrip": false,
935
  "normalized": true,
 
937
  "single_word": false,
938
  "special": false
939
  },
940
+ "50358": {
941
  "content": "<reserved_14>",
942
  "lstrip": false,
943
  "normalized": true,
 
945
  "single_word": false,
946
  "special": false
947
  },
948
+ "50359": {
949
  "content": "<reserved_15>",
950
  "lstrip": false,
951
  "normalized": true,
 
953
  "single_word": false,
954
  "special": false
955
  },
956
+ "50360": {
957
  "content": "<reserved_16>",
958
  "lstrip": false,
959
  "normalized": true,
 
961
  "single_word": false,
962
  "special": false
963
  },
964
+ "50361": {
965
  "content": "<reserved_17>",
966
  "lstrip": false,
967
  "normalized": true,
 
969
  "single_word": false,
970
  "special": false
971
  },
972
+ "50362": {
973
  "content": "<reserved_18>",
974
  "lstrip": false,
975
  "normalized": true,
 
977
  "single_word": false,
978
  "special": false
979
  },
980
+ "50363": {
981
  "content": "<reserved_19>",
982
  "lstrip": false,
983
  "normalized": true,
 
985
  "single_word": false,
986
  "special": false
987
  },
988
+ "50364": {
989
  "content": "<reserved_20>",
990
  "lstrip": false,
991
  "normalized": true,
 
993
  "single_word": false,
994
  "special": false
995
  },
996
+ "50365": {
997
  "content": "<reserved_21>",
998
  "lstrip": false,
999
  "normalized": true,
 
1001
  "single_word": false,
1002
  "special": false
1003
  },
1004
+ "50366": {
1005
  "content": "<reserved_22>",
1006
  "lstrip": false,
1007
  "normalized": true,
 
1009
  "single_word": false,
1010
  "special": false
1011
  },
1012
+ "50367": {
1013
  "content": "<reserved_23>",
1014
  "lstrip": false,
1015
  "normalized": true,
1016
  "rstrip": false,
1017
  "single_word": false,
1018
  "special": false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1019
  }
1020
  },
1021
  "additional_special_tokens": [
1022
+ "<parsing>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1023
  ],
1024
  "clean_up_tokenization_spaces": false,
1025
  "eos_token": "</s>",