TomasFAV commited on
Commit
75a109a
·
verified ·
1 Parent(s): 6181123

End of training

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +34 -97
  2. tokenizer.json +0 -0
  3. tokenizer_config.json +37 -39
special_tokens_map.json CHANGED
@@ -1,147 +1,147 @@
1
  {
2
  "additional_special_tokens": [
3
  {
4
- "content": "<s_due_date>",
5
  "lstrip": false,
6
  "normalized": false,
7
  "rstrip": false,
8
  "single_word": false
9
  },
10
  {
11
- "content": "</s_invoice_number>",
12
  "lstrip": false,
13
  "normalized": false,
14
  "rstrip": false,
15
  "single_word": false
16
  },
17
  {
18
- "content": "<s_vat>",
19
  "lstrip": false,
20
  "normalized": false,
21
  "rstrip": false,
22
  "single_word": false
23
  },
24
  {
25
- "content": "<s_bank_account_number>",
26
  "lstrip": false,
27
  "normalized": false,
28
  "rstrip": false,
29
  "single_word": false
30
  },
31
  {
32
- "content": "</s_total_price>",
33
  "lstrip": false,
34
  "normalized": false,
35
  "rstrip": false,
36
  "single_word": false
37
  },
38
  {
39
- "content": "</s_supplier_tax_id>",
40
  "lstrip": false,
41
  "normalized": false,
42
  "rstrip": false,
43
  "single_word": false
44
  },
45
  {
46
- "content": "<s_currency>",
47
  "lstrip": false,
48
  "normalized": false,
49
  "rstrip": false,
50
  "single_word": false
51
  },
52
  {
53
- "content": "<s_variable_symbol>",
54
  "lstrip": false,
55
  "normalized": false,
56
  "rstrip": false,
57
  "single_word": false
58
  },
59
  {
60
- "content": "<sep/>",
61
  "lstrip": false,
62
  "normalized": false,
63
  "rstrip": false,
64
  "single_word": false
65
  },
66
  {
67
- "content": "</s_vat_items>",
68
  "lstrip": false,
69
  "normalized": false,
70
  "rstrip": false,
71
  "single_word": false
72
  },
73
  {
74
- "content": "<s_BIC>",
75
  "lstrip": false,
76
  "normalized": false,
77
  "rstrip": false,
78
  "single_word": false
79
  },
80
  {
81
- "content": "<s_taxable_supply_date>",
82
  "lstrip": false,
83
  "normalized": false,
84
  "rstrip": false,
85
  "single_word": false
86
  },
87
  {
88
- "content": "<s_IBAN>",
89
  "lstrip": false,
90
  "normalized": false,
91
  "rstrip": false,
92
  "single_word": false
93
  },
94
  {
95
- "content": "<s_total_vat>",
96
  "lstrip": false,
97
  "normalized": false,
98
  "rstrip": false,
99
  "single_word": false
100
  },
101
  {
102
- "content": "</s_payment>",
103
  "lstrip": false,
104
  "normalized": false,
105
  "rstrip": false,
106
  "single_word": false
107
  },
108
  {
109
- "content": "</s_taxable_supply_date>",
110
  "lstrip": false,
111
  "normalized": false,
112
  "rstrip": false,
113
  "single_word": false
114
  },
115
  {
116
- "content": "<s_payment>",
117
  "lstrip": false,
118
  "normalized": false,
119
  "rstrip": false,
120
  "single_word": false
121
  },
122
  {
123
- "content": "<s_vat_items>",
124
  "lstrip": false,
125
  "normalized": false,
126
  "rstrip": false,
127
  "single_word": false
128
  },
129
  {
130
- "content": "</s_customer_name>",
131
  "lstrip": false,
132
  "normalized": false,
133
  "rstrip": false,
134
  "single_word": false
135
  },
136
  {
137
- "content": "<s_supplier_register_id>",
138
  "lstrip": false,
139
  "normalized": false,
140
  "rstrip": false,
141
  "single_word": false
142
  },
143
  {
144
- "content": "<s_customer_tax_id>",
145
  "lstrip": false,
146
  "normalized": false,
147
  "rstrip": false,
@@ -154,13 +154,6 @@
154
  "rstrip": false,
155
  "single_word": false
156
  },
157
- {
158
- "content": "<s_customer_name>",
159
- "lstrip": false,
160
- "normalized": false,
161
- "rstrip": false,
162
- "single_word": false
163
- },
164
  {
165
  "content": "</s_due_date>",
166
  "lstrip": false,
@@ -169,42 +162,14 @@
169
  "single_word": false
170
  },
171
  {
172
- "content": "</s_vat>",
173
- "lstrip": false,
174
- "normalized": false,
175
- "rstrip": false,
176
- "single_word": false
177
- },
178
- {
179
- "content": "</s_total_vat>",
180
- "lstrip": false,
181
- "normalized": false,
182
- "rstrip": false,
183
- "single_word": false
184
- },
185
- {
186
- "content": "</s_supplier_register_id>",
187
- "lstrip": false,
188
- "normalized": false,
189
- "rstrip": false,
190
- "single_word": false
191
- },
192
- {
193
- "content": "</s_vat_percentage>",
194
- "lstrip": false,
195
- "normalized": false,
196
- "rstrip": false,
197
- "single_word": false
198
- },
199
- {
200
- "content": "<s_supplier_tax_id>",
201
  "lstrip": false,
202
  "normalized": false,
203
  "rstrip": false,
204
  "single_word": false
205
  },
206
  {
207
- "content": "<s_vat_base>",
208
  "lstrip": false,
209
  "normalized": false,
210
  "rstrip": false,
@@ -218,7 +183,7 @@
218
  "single_word": false
219
  },
220
  {
221
- "content": "<s_vat_percentage>",
222
  "lstrip": false,
223
  "normalized": false,
224
  "rstrip": false,
@@ -232,42 +197,42 @@
232
  "single_word": false
233
  },
234
  {
235
- "content": "</s_customer_register_id>",
236
  "lstrip": false,
237
  "normalized": false,
238
  "rstrip": false,
239
  "single_word": false
240
  },
241
  {
242
- "content": "<s_supplier_name>",
243
  "lstrip": false,
244
  "normalized": false,
245
  "rstrip": false,
246
  "single_word": false
247
  },
248
  {
249
- "content": "</s_bank_account_number>",
250
  "lstrip": false,
251
  "normalized": false,
252
  "rstrip": false,
253
  "single_word": false
254
  },
255
  {
256
- "content": "<parsing>",
257
  "lstrip": false,
258
  "normalized": false,
259
  "rstrip": false,
260
  "single_word": false
261
  },
262
  {
263
- "content": "<s_invoice_number>",
264
  "lstrip": false,
265
  "normalized": false,
266
  "rstrip": false,
267
  "single_word": false
268
  },
269
  {
270
- "content": "<s_const_symbol>",
271
  "lstrip": false,
272
  "normalized": false,
273
  "rstrip": false,
@@ -281,56 +246,28 @@
281
  "single_word": false
282
  },
283
  {
284
- "content": "</s_customer_tax_id>",
285
- "lstrip": false,
286
- "normalized": false,
287
- "rstrip": false,
288
- "single_word": false
289
- },
290
- {
291
- "content": "</s_currency>",
292
- "lstrip": false,
293
- "normalized": false,
294
- "rstrip": false,
295
- "single_word": false
296
- },
297
- {
298
- "content": "</s_const_symbol>",
299
- "lstrip": false,
300
- "normalized": false,
301
- "rstrip": false,
302
- "single_word": false
303
- },
304
- {
305
- "content": "<s_total_price>",
306
- "lstrip": false,
307
- "normalized": false,
308
- "rstrip": false,
309
- "single_word": false
310
- },
311
- {
312
- "content": "</s_vat_base>",
313
  "lstrip": false,
314
  "normalized": false,
315
  "rstrip": false,
316
  "single_word": false
317
  },
318
  {
319
- "content": "<s_issue_date>",
320
  "lstrip": false,
321
  "normalized": false,
322
  "rstrip": false,
323
  "single_word": false
324
  },
325
  {
326
- "content": "<s_customer_register_id>",
327
  "lstrip": false,
328
  "normalized": false,
329
  "rstrip": false,
330
  "single_word": false
331
  },
332
  {
333
- "content": "</s_issue_date>",
334
  "lstrip": false,
335
  "normalized": false,
336
  "rstrip": false,
 
1
  {
2
  "additional_special_tokens": [
3
  {
4
+ "content": "</s_customer_name>",
5
  "lstrip": false,
6
  "normalized": false,
7
  "rstrip": false,
8
  "single_word": false
9
  },
10
  {
11
+ "content": "<s_supplier_name>",
12
  "lstrip": false,
13
  "normalized": false,
14
  "rstrip": false,
15
  "single_word": false
16
  },
17
  {
18
+ "content": "<s_customer_name>",
19
  "lstrip": false,
20
  "normalized": false,
21
  "rstrip": false,
22
  "single_word": false
23
  },
24
  {
25
+ "content": "<s_supplier_register_id>",
26
  "lstrip": false,
27
  "normalized": false,
28
  "rstrip": false,
29
  "single_word": false
30
  },
31
  {
32
+ "content": "<s_supplier_tax_id>",
33
  "lstrip": false,
34
  "normalized": false,
35
  "rstrip": false,
36
  "single_word": false
37
  },
38
  {
39
+ "content": "</s_supplier_register_id>",
40
  "lstrip": false,
41
  "normalized": false,
42
  "rstrip": false,
43
  "single_word": false
44
  },
45
  {
46
+ "content": "</s_total_price>",
47
  "lstrip": false,
48
  "normalized": false,
49
  "rstrip": false,
50
  "single_word": false
51
  },
52
  {
53
+ "content": "</s_total_vat>",
54
  "lstrip": false,
55
  "normalized": false,
56
  "rstrip": false,
57
  "single_word": false
58
  },
59
  {
60
+ "content": "<parsing>",
61
  "lstrip": false,
62
  "normalized": false,
63
  "rstrip": false,
64
  "single_word": false
65
  },
66
  {
67
+ "content": "</s_customer_register_id>",
68
  "lstrip": false,
69
  "normalized": false,
70
  "rstrip": false,
71
  "single_word": false
72
  },
73
  {
74
+ "content": "</s_taxable_supply_date>",
75
  "lstrip": false,
76
  "normalized": false,
77
  "rstrip": false,
78
  "single_word": false
79
  },
80
  {
81
+ "content": "</s_payment>",
82
  "lstrip": false,
83
  "normalized": false,
84
  "rstrip": false,
85
  "single_word": false
86
  },
87
  {
88
+ "content": "<s_bank_account_number>",
89
  "lstrip": false,
90
  "normalized": false,
91
  "rstrip": false,
92
  "single_word": false
93
  },
94
  {
95
+ "content": "</s_const_symbol>",
96
  "lstrip": false,
97
  "normalized": false,
98
  "rstrip": false,
99
  "single_word": false
100
  },
101
  {
102
+ "content": "<s_issue_date>",
103
  "lstrip": false,
104
  "normalized": false,
105
  "rstrip": false,
106
  "single_word": false
107
  },
108
  {
109
+ "content": "<s_currency>",
110
  "lstrip": false,
111
  "normalized": false,
112
  "rstrip": false,
113
  "single_word": false
114
  },
115
  {
116
+ "content": "<s_customer_register_id>",
117
  "lstrip": false,
118
  "normalized": false,
119
  "rstrip": false,
120
  "single_word": false
121
  },
122
  {
123
+ "content": "<s_total_vat>",
124
  "lstrip": false,
125
  "normalized": false,
126
  "rstrip": false,
127
  "single_word": false
128
  },
129
  {
130
+ "content": "</s_bank_account_number>",
131
  "lstrip": false,
132
  "normalized": false,
133
  "rstrip": false,
134
  "single_word": false
135
  },
136
  {
137
+ "content": "<s_invoice_number>",
138
  "lstrip": false,
139
  "normalized": false,
140
  "rstrip": false,
141
  "single_word": false
142
  },
143
  {
144
+ "content": "</s_invoice_number>",
145
  "lstrip": false,
146
  "normalized": false,
147
  "rstrip": false,
 
154
  "rstrip": false,
155
  "single_word": false
156
  },
 
 
 
 
 
 
 
157
  {
158
  "content": "</s_due_date>",
159
  "lstrip": false,
 
162
  "single_word": false
163
  },
164
  {
165
+ "content": "<s_variable_symbol>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
  "lstrip": false,
167
  "normalized": false,
168
  "rstrip": false,
169
  "single_word": false
170
  },
171
  {
172
+ "content": "</s_currency>",
173
  "lstrip": false,
174
  "normalized": false,
175
  "rstrip": false,
 
183
  "single_word": false
184
  },
185
  {
186
+ "content": "</s_supplier_tax_id>",
187
  "lstrip": false,
188
  "normalized": false,
189
  "rstrip": false,
 
197
  "single_word": false
198
  },
199
  {
200
+ "content": "</s_customer_tax_id>",
201
  "lstrip": false,
202
  "normalized": false,
203
  "rstrip": false,
204
  "single_word": false
205
  },
206
  {
207
+ "content": "<s_total_price>",
208
  "lstrip": false,
209
  "normalized": false,
210
  "rstrip": false,
211
  "single_word": false
212
  },
213
  {
214
+ "content": "<s_payment>",
215
  "lstrip": false,
216
  "normalized": false,
217
  "rstrip": false,
218
  "single_word": false
219
  },
220
  {
221
+ "content": "<s_taxable_supply_date>",
222
  "lstrip": false,
223
  "normalized": false,
224
  "rstrip": false,
225
  "single_word": false
226
  },
227
  {
228
+ "content": "<s_IBAN>",
229
  "lstrip": false,
230
  "normalized": false,
231
  "rstrip": false,
232
  "single_word": false
233
  },
234
  {
235
+ "content": "<s_BIC>",
236
  "lstrip": false,
237
  "normalized": false,
238
  "rstrip": false,
 
246
  "single_word": false
247
  },
248
  {
249
+ "content": "<s_customer_tax_id>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
250
  "lstrip": false,
251
  "normalized": false,
252
  "rstrip": false,
253
  "single_word": false
254
  },
255
  {
256
+ "content": "</s_issue_date>",
257
  "lstrip": false,
258
  "normalized": false,
259
  "rstrip": false,
260
  "single_word": false
261
  },
262
  {
263
+ "content": "<s_due_date>",
264
  "lstrip": false,
265
  "normalized": false,
266
  "rstrip": false,
267
  "single_word": false
268
  },
269
  {
270
+ "content": "<s_const_symbol>",
271
  "lstrip": false,
272
  "normalized": false,
273
  "rstrip": false,
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -666,54 +666,45 @@
666
  }
667
  },
668
  "additional_special_tokens": [
669
- "<s_due_date>",
670
- "</s_invoice_number>",
671
- "<s_vat>",
672
- "<s_bank_account_number>",
 
 
673
  "</s_total_price>",
674
- "</s_supplier_tax_id>",
 
 
 
 
 
 
 
675
  "<s_currency>",
676
- "<s_variable_symbol>",
677
- "<sep/>",
678
- "</s_vat_items>",
679
- "<s_BIC>",
680
- "<s_taxable_supply_date>",
681
- "<s_IBAN>",
682
  "<s_total_vat>",
683
- "</s_payment>",
684
- "</s_taxable_supply_date>",
685
- "<s_payment>",
686
- "<s_vat_items>",
687
- "</s_customer_name>",
688
- "<s_supplier_register_id>",
689
- "<s_customer_tax_id>",
690
  "</s_variable_symbol>",
691
- "<s_customer_name>",
692
  "</s_due_date>",
693
- "</s_vat>",
694
- "</s_total_vat>",
695
- "</s_supplier_register_id>",
696
- "</s_vat_percentage>",
697
- "<s_supplier_tax_id>",
698
- "<s_vat_base>",
699
  "</s_supplier_name>",
700
- "<s_vat_percentage>",
701
  "</s_IBAN>",
702
- "</s_customer_register_id>",
703
- "<s_supplier_name>",
704
- "</s_bank_account_number>",
705
- "<parsing>",
706
- "<s_invoice_number>",
707
- "<s_const_symbol>",
708
- "</s_BIC>",
709
  "</s_customer_tax_id>",
710
- "</s_currency>",
711
- "</s_const_symbol>",
712
  "<s_total_price>",
713
- "</s_vat_base>",
714
- "<s_issue_date>",
715
- "<s_customer_register_id>",
716
- "</s_issue_date>"
 
 
 
 
 
717
  ],
718
  "bos_token": "<s>",
719
  "clean_up_tokenization_spaces": false,
@@ -721,11 +712,18 @@
721
  "eos_token": "</s>",
722
  "extra_special_tokens": {},
723
  "mask_token": "<mask>",
 
724
  "model_max_length": 1000000000000000019884624838656,
 
725
  "pad_token": "<pad>",
 
 
726
  "processor_class": "DonutProcessor",
727
  "sep_token": "</s>",
728
  "sp_model_kwargs": {},
 
729
  "tokenizer_class": "XLMRobertaTokenizer",
 
 
730
  "unk_token": "<unk>"
731
  }
 
666
  }
667
  },
668
  "additional_special_tokens": [
669
+ "</s_customer_name>",
670
+ "<s_supplier_name>",
671
+ "<s_customer_name>",
672
+ "<s_supplier_register_id>",
673
+ "<s_supplier_tax_id>",
674
+ "</s_supplier_register_id>",
675
  "</s_total_price>",
676
+ "</s_total_vat>",
677
+ "<parsing>",
678
+ "</s_customer_register_id>",
679
+ "</s_taxable_supply_date>",
680
+ "</s_payment>",
681
+ "<s_bank_account_number>",
682
+ "</s_const_symbol>",
683
+ "<s_issue_date>",
684
  "<s_currency>",
685
+ "<s_customer_register_id>",
 
 
 
 
 
686
  "<s_total_vat>",
687
+ "</s_bank_account_number>",
688
+ "<s_invoice_number>",
689
+ "</s_invoice_number>",
 
 
 
 
690
  "</s_variable_symbol>",
 
691
  "</s_due_date>",
692
+ "<s_variable_symbol>",
693
+ "</s_currency>",
 
 
 
 
694
  "</s_supplier_name>",
695
+ "</s_supplier_tax_id>",
696
  "</s_IBAN>",
 
 
 
 
 
 
 
697
  "</s_customer_tax_id>",
 
 
698
  "<s_total_price>",
699
+ "<s_payment>",
700
+ "<s_taxable_supply_date>",
701
+ "<s_IBAN>",
702
+ "<s_BIC>",
703
+ "</s_BIC>",
704
+ "<s_customer_tax_id>",
705
+ "</s_issue_date>",
706
+ "<s_due_date>",
707
+ "<s_const_symbol>"
708
  ],
709
  "bos_token": "<s>",
710
  "clean_up_tokenization_spaces": false,
 
712
  "eos_token": "</s>",
713
  "extra_special_tokens": {},
714
  "mask_token": "<mask>",
715
+ "max_length": 128,
716
  "model_max_length": 1000000000000000019884624838656,
717
+ "pad_to_multiple_of": null,
718
  "pad_token": "<pad>",
719
+ "pad_token_type_id": 0,
720
+ "padding_side": "right",
721
  "processor_class": "DonutProcessor",
722
  "sep_token": "</s>",
723
  "sp_model_kwargs": {},
724
+ "stride": 0,
725
  "tokenizer_class": "XLMRobertaTokenizer",
726
+ "truncation_side": "right",
727
+ "truncation_strategy": "longest_first",
728
  "unk_token": "<unk>"
729
  }