TomasFAV commited on
Commit
d2223e5
·
verified ·
1 Parent(s): c44a185

End of training

Browse files
README.md CHANGED
@@ -1,7 +1,11 @@
1
  ---
2
  library_name: transformers
 
 
3
  tags:
4
  - generated_from_trainer
 
 
5
  model-index:
6
  - name: DonutInvoiceCzechTrainer
7
  results: []
@@ -12,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
12
 
13
  # DonutInvoiceCzechTrainer
14
 
15
- This model was trained from scratch on an unknown dataset.
16
 
17
  ## Model description
18
 
@@ -32,12 +36,12 @@ More information needed
32
 
33
  The following hyperparameters were used during training:
34
  - learning_rate: 2e-05
35
- - train_batch_size: 5
36
- - eval_batch_size: 1
37
  - seed: 42
38
  - optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
39
  - lr_scheduler_type: linear
40
- - num_epochs: 7
41
  - mixed_precision_training: Native AMP
42
 
43
  ### Training results
 
1
  ---
2
  library_name: transformers
3
+ license: mit
4
+ base_model: naver-clova-ix/donut-base
5
  tags:
6
  - generated_from_trainer
7
+ datasets:
8
+ - imagefolder
9
  model-index:
10
  - name: DonutInvoiceCzechTrainer
11
  results: []
 
16
 
17
  # DonutInvoiceCzechTrainer
18
 
19
+ This model is a fine-tuned version of [naver-clova-ix/donut-base](https://huggingface.co/naver-clova-ix/donut-base) on the imagefolder dataset.
20
 
21
  ## Model description
22
 
 
36
 
37
  The following hyperparameters were used during training:
38
  - learning_rate: 2e-05
39
+ - train_batch_size: 1
40
+ - eval_batch_size: 8
41
  - seed: 42
42
  - optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
43
  - lr_scheduler_type: linear
44
+ - num_epochs: 3
45
  - mixed_precision_training: Native AMP
46
 
47
  ### Training results
added_tokens.json CHANGED
@@ -1,52 +1,13 @@
1
  {
2
- "</s_BIC>": 57525,
3
- "</s_IBAN>": 57557,
4
- "</s_bank_account_number>": 57537,
5
- "</s_const_symbol>": 57529,
6
- "</s_currency>": 57541,
7
- "</s_customer_name>": 57560,
8
- "</s_customer_register_id>": 57554,
9
- "</s_customer_tax_id>": 57531,
10
- "</s_due_date>": 57561,
11
- "</s_invoice_number>": 57548,
12
- "</s_issue_date>": 57569,
13
- "</s_payment>": 57540,
14
- "</s_supplier_name>": 57533,
15
- "</s_supplier_register_id>": 57551,
16
- "</s_supplier_tax_id>": 57542,
17
- "</s_taxable_supply_date>": 57532,
18
- "</s_total_price>": 57538,
19
- "</s_total_vat>": 57568,
20
- "</s_variable_symbol>": 57566,
21
- "</s_vat>": 57530,
22
- "</s_vat_base>": 57562,
23
- "</s_vat_items>": 57536,
24
- "</s_vat_percentage>": 57563,
25
- "<s_BIC>": 57526,
26
- "<s_IBAN>": 57528,
27
- "<s_bank_account_number>": 57565,
28
- "<s_const_symbol>": 57552,
29
- "<s_cord-v2>": 57571,
30
- "<s_currency>": 57535,
31
- "<s_customer_name>": 57539,
32
- "<s_customer_register_id>": 57544,
33
- "<s_customer_tax_id>": 57549,
34
- "<s_due_date>": 57527,
35
  "<s_iitcdip>": 57523,
36
- "<s_invoice_number>": 57556,
37
- "<s_issue_date>": 57567,
38
- "<s_payment>": 57547,
39
- "<s_supplier_name>": 57558,
40
- "<s_supplier_register_id>": 57559,
41
- "<s_supplier_tax_id>": 57545,
42
  "<s_synthdog>": 57524,
43
- "<s_taxable_supply_date>": 57553,
44
- "<s_total_price>": 57564,
45
- "<s_total_vat>": 57550,
46
- "<s_variable_symbol>": 57570,
47
- "<s_vat>": 57543,
48
- "<s_vat_base>": 57546,
49
- "<s_vat_items>": 57534,
50
- "<s_vat_percentage>": 57555,
51
  "<sep/>": 57522
52
  }
 
1
  {
2
+ "</s_address>": 57532,
3
+ "</s_company>": 57530,
4
+ "</s_date>": 57528,
5
+ "</s_total>": 57526,
6
+ "<s_address>": 57531,
7
+ "<s_company>": 57529,
8
+ "<s_date>": 57527,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  "<s_iitcdip>": 57523,
 
 
 
 
 
 
10
  "<s_synthdog>": 57524,
11
+ "<s_total>": 57525,
 
 
 
 
 
 
 
12
  "<sep/>": 57522
13
  }
preprocessor_config.json CHANGED
@@ -19,8 +19,8 @@
19
  "processor_class": "DonutProcessor",
20
  "resample": 2,
21
  "rescale_factor": 0.00392156862745098,
22
- "size": {
23
- "height": 2338,
24
- "width": 1654
25
- }
26
  }
 
19
  "processor_class": "DonutProcessor",
20
  "resample": 2,
21
  "rescale_factor": 0.00392156862745098,
22
+ "size": [
23
+ 720,
24
+ 960
25
+ ]
26
  }
special_tokens_map.json CHANGED
@@ -1,7 +1,75 @@
1
  {
2
  "additional_special_tokens": [
3
- "<s_iitcdip>",
4
- "<s_synthdog>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  ],
6
  "bos_token": {
7
  "content": "<s>",
 
1
  {
2
  "additional_special_tokens": [
3
+ {
4
+ "content": "<s_total>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "</s_total>",
12
+ "lstrip": false,
13
+ "normalized": false,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ },
17
+ {
18
+ "content": "<s_date>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ {
25
+ "content": "</s_date>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ },
31
+ {
32
+ "content": "<s_company>",
33
+ "lstrip": false,
34
+ "normalized": false,
35
+ "rstrip": false,
36
+ "single_word": false
37
+ },
38
+ {
39
+ "content": "</s_company>",
40
+ "lstrip": false,
41
+ "normalized": false,
42
+ "rstrip": false,
43
+ "single_word": false
44
+ },
45
+ {
46
+ "content": "<s_address>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false
51
+ },
52
+ {
53
+ "content": "</s_address>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false
58
+ },
59
+ {
60
+ "content": "<s>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false
65
+ },
66
+ {
67
+ "content": "</s>",
68
+ "lstrip": false,
69
+ "normalized": false,
70
+ "rstrip": false,
71
+ "single_word": false
72
+ }
73
  ],
74
  "bos_token": {
75
  "content": "<s>",
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -65,385 +65,81 @@
65
  "special": true
66
  },
67
  "57525": {
68
- "content": "</s_BIC>",
69
  "lstrip": false,
70
- "normalized": true,
71
  "rstrip": false,
72
  "single_word": false,
73
- "special": false
74
  },
75
  "57526": {
76
- "content": "<s_BIC>",
77
  "lstrip": false,
78
- "normalized": true,
79
  "rstrip": false,
80
  "single_word": false,
81
- "special": false
82
  },
83
  "57527": {
84
- "content": "<s_due_date>",
85
  "lstrip": false,
86
- "normalized": true,
87
  "rstrip": false,
88
  "single_word": false,
89
- "special": false
90
  },
91
  "57528": {
92
- "content": "<s_IBAN>",
93
  "lstrip": false,
94
- "normalized": true,
95
  "rstrip": false,
96
  "single_word": false,
97
- "special": false
98
  },
99
  "57529": {
100
- "content": "</s_const_symbol>",
101
  "lstrip": false,
102
- "normalized": true,
103
  "rstrip": false,
104
  "single_word": false,
105
- "special": false
106
  },
107
  "57530": {
108
- "content": "</s_vat>",
109
  "lstrip": false,
110
- "normalized": true,
111
  "rstrip": false,
112
  "single_word": false,
113
- "special": false
114
  },
115
  "57531": {
116
- "content": "</s_customer_tax_id>",
117
  "lstrip": false,
118
- "normalized": true,
119
  "rstrip": false,
120
  "single_word": false,
121
- "special": false
122
  },
123
  "57532": {
124
- "content": "</s_taxable_supply_date>",
125
- "lstrip": false,
126
- "normalized": true,
127
- "rstrip": false,
128
- "single_word": false,
129
- "special": false
130
- },
131
- "57533": {
132
- "content": "</s_supplier_name>",
133
- "lstrip": false,
134
- "normalized": true,
135
- "rstrip": false,
136
- "single_word": false,
137
- "special": false
138
- },
139
- "57534": {
140
- "content": "<s_vat_items>",
141
- "lstrip": false,
142
- "normalized": true,
143
- "rstrip": false,
144
- "single_word": false,
145
- "special": false
146
- },
147
- "57535": {
148
- "content": "<s_currency>",
149
- "lstrip": false,
150
- "normalized": true,
151
- "rstrip": false,
152
- "single_word": false,
153
- "special": false
154
- },
155
- "57536": {
156
- "content": "</s_vat_items>",
157
- "lstrip": false,
158
- "normalized": true,
159
- "rstrip": false,
160
- "single_word": false,
161
- "special": false
162
- },
163
- "57537": {
164
- "content": "</s_bank_account_number>",
165
- "lstrip": false,
166
- "normalized": true,
167
- "rstrip": false,
168
- "single_word": false,
169
- "special": false
170
- },
171
- "57538": {
172
- "content": "</s_total_price>",
173
- "lstrip": false,
174
- "normalized": true,
175
- "rstrip": false,
176
- "single_word": false,
177
- "special": false
178
- },
179
- "57539": {
180
- "content": "<s_customer_name>",
181
- "lstrip": false,
182
- "normalized": true,
183
- "rstrip": false,
184
- "single_word": false,
185
- "special": false
186
- },
187
- "57540": {
188
- "content": "</s_payment>",
189
- "lstrip": false,
190
- "normalized": true,
191
- "rstrip": false,
192
- "single_word": false,
193
- "special": false
194
- },
195
- "57541": {
196
- "content": "</s_currency>",
197
- "lstrip": false,
198
- "normalized": true,
199
- "rstrip": false,
200
- "single_word": false,
201
- "special": false
202
- },
203
- "57542": {
204
- "content": "</s_supplier_tax_id>",
205
- "lstrip": false,
206
- "normalized": true,
207
- "rstrip": false,
208
- "single_word": false,
209
- "special": false
210
- },
211
- "57543": {
212
- "content": "<s_vat>",
213
- "lstrip": false,
214
- "normalized": true,
215
- "rstrip": false,
216
- "single_word": false,
217
- "special": false
218
- },
219
- "57544": {
220
- "content": "<s_customer_register_id>",
221
- "lstrip": false,
222
- "normalized": true,
223
- "rstrip": false,
224
- "single_word": false,
225
- "special": false
226
- },
227
- "57545": {
228
- "content": "<s_supplier_tax_id>",
229
- "lstrip": false,
230
- "normalized": true,
231
- "rstrip": false,
232
- "single_word": false,
233
- "special": false
234
- },
235
- "57546": {
236
- "content": "<s_vat_base>",
237
- "lstrip": false,
238
- "normalized": true,
239
- "rstrip": false,
240
- "single_word": false,
241
- "special": false
242
- },
243
- "57547": {
244
- "content": "<s_payment>",
245
- "lstrip": false,
246
- "normalized": true,
247
- "rstrip": false,
248
- "single_word": false,
249
- "special": false
250
- },
251
- "57548": {
252
- "content": "</s_invoice_number>",
253
- "lstrip": false,
254
- "normalized": true,
255
- "rstrip": false,
256
- "single_word": false,
257
- "special": false
258
- },
259
- "57549": {
260
- "content": "<s_customer_tax_id>",
261
- "lstrip": false,
262
- "normalized": true,
263
- "rstrip": false,
264
- "single_word": false,
265
- "special": false
266
- },
267
- "57550": {
268
- "content": "<s_total_vat>",
269
- "lstrip": false,
270
- "normalized": true,
271
- "rstrip": false,
272
- "single_word": false,
273
- "special": false
274
- },
275
- "57551": {
276
- "content": "</s_supplier_register_id>",
277
- "lstrip": false,
278
- "normalized": true,
279
- "rstrip": false,
280
- "single_word": false,
281
- "special": false
282
- },
283
- "57552": {
284
- "content": "<s_const_symbol>",
285
- "lstrip": false,
286
- "normalized": true,
287
- "rstrip": false,
288
- "single_word": false,
289
- "special": false
290
- },
291
- "57553": {
292
- "content": "<s_taxable_supply_date>",
293
  "lstrip": false,
294
- "normalized": true,
295
- "rstrip": false,
296
- "single_word": false,
297
- "special": false
298
- },
299
- "57554": {
300
- "content": "</s_customer_register_id>",
301
- "lstrip": false,
302
- "normalized": true,
303
- "rstrip": false,
304
- "single_word": false,
305
- "special": false
306
- },
307
- "57555": {
308
- "content": "<s_vat_percentage>",
309
- "lstrip": false,
310
- "normalized": true,
311
- "rstrip": false,
312
- "single_word": false,
313
- "special": false
314
- },
315
- "57556": {
316
- "content": "<s_invoice_number>",
317
- "lstrip": false,
318
- "normalized": true,
319
- "rstrip": false,
320
- "single_word": false,
321
- "special": false
322
- },
323
- "57557": {
324
- "content": "</s_IBAN>",
325
- "lstrip": false,
326
- "normalized": true,
327
- "rstrip": false,
328
- "single_word": false,
329
- "special": false
330
- },
331
- "57558": {
332
- "content": "<s_supplier_name>",
333
- "lstrip": false,
334
- "normalized": true,
335
- "rstrip": false,
336
- "single_word": false,
337
- "special": false
338
- },
339
- "57559": {
340
- "content": "<s_supplier_register_id>",
341
- "lstrip": false,
342
- "normalized": true,
343
- "rstrip": false,
344
- "single_word": false,
345
- "special": false
346
- },
347
- "57560": {
348
- "content": "</s_customer_name>",
349
- "lstrip": false,
350
- "normalized": true,
351
- "rstrip": false,
352
- "single_word": false,
353
- "special": false
354
- },
355
- "57561": {
356
- "content": "</s_due_date>",
357
- "lstrip": false,
358
- "normalized": true,
359
- "rstrip": false,
360
- "single_word": false,
361
- "special": false
362
- },
363
- "57562": {
364
- "content": "</s_vat_base>",
365
- "lstrip": false,
366
- "normalized": true,
367
- "rstrip": false,
368
- "single_word": false,
369
- "special": false
370
- },
371
- "57563": {
372
- "content": "</s_vat_percentage>",
373
- "lstrip": false,
374
- "normalized": true,
375
- "rstrip": false,
376
- "single_word": false,
377
- "special": false
378
- },
379
- "57564": {
380
- "content": "<s_total_price>",
381
- "lstrip": false,
382
- "normalized": true,
383
- "rstrip": false,
384
- "single_word": false,
385
- "special": false
386
- },
387
- "57565": {
388
- "content": "<s_bank_account_number>",
389
- "lstrip": false,
390
- "normalized": true,
391
- "rstrip": false,
392
- "single_word": false,
393
- "special": false
394
- },
395
- "57566": {
396
- "content": "</s_variable_symbol>",
397
- "lstrip": false,
398
- "normalized": true,
399
- "rstrip": false,
400
- "single_word": false,
401
- "special": false
402
- },
403
- "57567": {
404
- "content": "<s_issue_date>",
405
- "lstrip": false,
406
- "normalized": true,
407
- "rstrip": false,
408
- "single_word": false,
409
- "special": false
410
- },
411
- "57568": {
412
- "content": "</s_total_vat>",
413
- "lstrip": false,
414
- "normalized": true,
415
- "rstrip": false,
416
- "single_word": false,
417
- "special": false
418
- },
419
- "57569": {
420
- "content": "</s_issue_date>",
421
- "lstrip": false,
422
- "normalized": true,
423
- "rstrip": false,
424
- "single_word": false,
425
- "special": false
426
- },
427
- "57570": {
428
- "content": "<s_variable_symbol>",
429
- "lstrip": false,
430
- "normalized": true,
431
- "rstrip": false,
432
- "single_word": false,
433
- "special": false
434
- },
435
- "57571": {
436
- "content": "<s_cord-v2>",
437
- "lstrip": false,
438
- "normalized": true,
439
  "rstrip": false,
440
  "single_word": false,
441
- "special": false
442
  }
443
  },
444
  "additional_special_tokens": [
445
- "<s_iitcdip>",
446
- "<s_synthdog>"
 
 
 
 
 
 
 
 
447
  ],
448
  "bos_token": "<s>",
449
  "clean_up_tokenization_spaces": false,
@@ -451,18 +147,11 @@
451
  "eos_token": "</s>",
452
  "extra_special_tokens": {},
453
  "mask_token": "<mask>",
454
- "max_length": 512,
455
  "model_max_length": 1000000000000000019884624838656,
456
- "pad_to_multiple_of": null,
457
  "pad_token": "<pad>",
458
- "pad_token_type_id": 0,
459
- "padding_side": "right",
460
  "processor_class": "DonutProcessor",
461
  "sep_token": "</s>",
462
  "sp_model_kwargs": {},
463
- "stride": 0,
464
  "tokenizer_class": "XLMRobertaTokenizer",
465
- "truncation_side": "right",
466
- "truncation_strategy": "longest_first",
467
  "unk_token": "<unk>"
468
  }
 
65
  "special": true
66
  },
67
  "57525": {
68
+ "content": "<s_total>",
69
  "lstrip": false,
70
+ "normalized": false,
71
  "rstrip": false,
72
  "single_word": false,
73
+ "special": true
74
  },
75
  "57526": {
76
+ "content": "</s_total>",
77
  "lstrip": false,
78
+ "normalized": false,
79
  "rstrip": false,
80
  "single_word": false,
81
+ "special": true
82
  },
83
  "57527": {
84
+ "content": "<s_date>",
85
  "lstrip": false,
86
+ "normalized": false,
87
  "rstrip": false,
88
  "single_word": false,
89
+ "special": true
90
  },
91
  "57528": {
92
+ "content": "</s_date>",
93
  "lstrip": false,
94
+ "normalized": false,
95
  "rstrip": false,
96
  "single_word": false,
97
+ "special": true
98
  },
99
  "57529": {
100
+ "content": "<s_company>",
101
  "lstrip": false,
102
+ "normalized": false,
103
  "rstrip": false,
104
  "single_word": false,
105
+ "special": true
106
  },
107
  "57530": {
108
+ "content": "</s_company>",
109
  "lstrip": false,
110
+ "normalized": false,
111
  "rstrip": false,
112
  "single_word": false,
113
+ "special": true
114
  },
115
  "57531": {
116
+ "content": "<s_address>",
117
  "lstrip": false,
118
+ "normalized": false,
119
  "rstrip": false,
120
  "single_word": false,
121
+ "special": true
122
  },
123
  "57532": {
124
+ "content": "</s_address>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  "lstrip": false,
126
+ "normalized": false,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  "rstrip": false,
128
  "single_word": false,
129
+ "special": true
130
  }
131
  },
132
  "additional_special_tokens": [
133
+ "<s_total>",
134
+ "</s_total>",
135
+ "<s_date>",
136
+ "</s_date>",
137
+ "<s_company>",
138
+ "</s_company>",
139
+ "<s_address>",
140
+ "</s_address>",
141
+ "<s>",
142
+ "</s>"
143
  ],
144
  "bos_token": "<s>",
145
  "clean_up_tokenization_spaces": false,
 
147
  "eos_token": "</s>",
148
  "extra_special_tokens": {},
149
  "mask_token": "<mask>",
 
150
  "model_max_length": 1000000000000000019884624838656,
 
151
  "pad_token": "<pad>",
 
 
152
  "processor_class": "DonutProcessor",
153
  "sep_token": "</s>",
154
  "sp_model_kwargs": {},
 
155
  "tokenizer_class": "XLMRobertaTokenizer",
 
 
156
  "unk_token": "<unk>"
157
  }