Files changed (3) hide show
  1. added_tokens.json +46 -0
  2. preprocessor_config.json +5 -5
  3. tokenizer.json +430 -2
added_tokens.json CHANGED
@@ -1,5 +1,51 @@
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "<s_iitcdip>": 57523,
 
 
 
 
 
 
 
 
 
3
  "<s_synthdog>": 57524,
 
 
 
 
 
 
4
  "<sep/>": 57522
5
  }
 
1
  {
2
+ "</s_>": 57566,
3
+ "</s_cashprice>": 57550,
4
+ "</s_changeprice>": 57552,
5
+ "</s_cnt>": 57530,
6
+ "</s_creditcardprice>": 57564,
7
+ "</s_discount_price>": 57558,
8
+ "</s_discountprice>": 57570,
9
+ "</s_etc>": 57542,
10
+ "</s_menu>": 57526,
11
+ "</s_menuqty_cnt>": 57556,
12
+ "</s_menutype_cnt>": 57554,
13
+ "</s_nm>": 57528,
14
+ "</s_price>": 57532,
15
+ "</s_service_price>": 57538,
16
+ "</s_sub>": 57548,
17
+ "</s_sub_total>": 57534,
18
+ "</s_subtotal_price>": 57536,
19
+ "</s_tax_price>": 57540,
20
+ "</s_total>": 57544,
21
+ "</s_total_etc>": 57562,
22
+ "</s_total_price>": 57546,
23
+ "</s_unitprice>": 57560,
24
+ "</s_vatyn>": 57568,
25
+ "<s_>": 57565,
26
+ "<s_cashprice>": 57549,
27
+ "<s_changeprice>": 57551,
28
+ "<s_cnt>": 57529,
29
+ "<s_creditcardprice>": 57563,
30
+ "<s_discount_price>": 57557,
31
+ "<s_discountprice>": 57569,
32
+ "<s_etc>": 57541,
33
  "<s_iitcdip>": 57523,
34
+ "<s_menu>": 57525,
35
+ "<s_menuqty_cnt>": 57555,
36
+ "<s_menutype_cnt>": 57553,
37
+ "<s_nm>": 57527,
38
+ "<s_price>": 57531,
39
+ "<s_service_price>": 57537,
40
+ "<s_sub>": 57547,
41
+ "<s_sub_total>": 57533,
42
+ "<s_subtotal_price>": 57535,
43
  "<s_synthdog>": 57524,
44
+ "<s_tax_price>": 57539,
45
+ "<s_total>": 57543,
46
+ "<s_total_etc>": 57561,
47
+ "<s_total_price>": 57545,
48
+ "<s_unitprice>": 57559,
49
+ "<s_vatyn>": 57567,
50
  "<sep/>": 57522
51
  }
preprocessor_config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "do_align_long_axis": true,
3
  "do_normalize": true,
4
  "do_pad": true,
5
  "do_rescale": true,
@@ -19,8 +19,8 @@
19
  "processor_class": "DonutProcessor",
20
  "resample": 2,
21
  "rescale_factor": 0.00392156862745098,
22
- "size": {
23
- "height": 2560,
24
- "width": 1920
25
- }
26
  }
 
1
  {
2
+ "do_align_long_axis": false,
3
  "do_normalize": true,
4
  "do_pad": true,
5
  "do_rescale": true,
 
19
  "processor_class": "DonutProcessor",
20
  "resample": 2,
21
  "rescale_factor": 0.00392156862745098,
22
+ "size": [
23
+ 960,
24
+ 1280
25
+ ]
26
  }
tokenizer.json CHANGED
@@ -1,7 +1,21 @@
1
  {
2
  "version": "1.0",
3
- "truncation": null,
4
- "padding": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  "added_tokens": [
6
  {
7
  "id": 0,
@@ -74,6 +88,420 @@
74
  "rstrip": false,
75
  "normalized": false,
76
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  }
78
  ],
79
  "normalizer": {
 
1
  {
2
  "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 768,
6
+ "strategy": "LongestFirst",
7
+ "stride": 0
8
+ },
9
+ "padding": {
10
+ "strategy": {
11
+ "Fixed": 768
12
+ },
13
+ "direction": "Right",
14
+ "pad_to_multiple_of": null,
15
+ "pad_id": 1,
16
+ "pad_type_id": 0,
17
+ "pad_token": "<pad>"
18
+ },
19
  "added_tokens": [
20
  {
21
  "id": 0,
 
88
  "rstrip": false,
89
  "normalized": false,
90
  "special": true
91
+ },
92
+ {
93
+ "id": 57525,
94
+ "content": "<s_menu>",
95
+ "single_word": false,
96
+ "lstrip": false,
97
+ "rstrip": false,
98
+ "normalized": true,
99
+ "special": false
100
+ },
101
+ {
102
+ "id": 57526,
103
+ "content": "</s_menu>",
104
+ "single_word": false,
105
+ "lstrip": false,
106
+ "rstrip": false,
107
+ "normalized": true,
108
+ "special": false
109
+ },
110
+ {
111
+ "id": 57527,
112
+ "content": "<s_nm>",
113
+ "single_word": false,
114
+ "lstrip": false,
115
+ "rstrip": false,
116
+ "normalized": true,
117
+ "special": false
118
+ },
119
+ {
120
+ "id": 57528,
121
+ "content": "</s_nm>",
122
+ "single_word": false,
123
+ "lstrip": false,
124
+ "rstrip": false,
125
+ "normalized": true,
126
+ "special": false
127
+ },
128
+ {
129
+ "id": 57529,
130
+ "content": "<s_cnt>",
131
+ "single_word": false,
132
+ "lstrip": false,
133
+ "rstrip": false,
134
+ "normalized": true,
135
+ "special": false
136
+ },
137
+ {
138
+ "id": 57530,
139
+ "content": "</s_cnt>",
140
+ "single_word": false,
141
+ "lstrip": false,
142
+ "rstrip": false,
143
+ "normalized": true,
144
+ "special": false
145
+ },
146
+ {
147
+ "id": 57531,
148
+ "content": "<s_price>",
149
+ "single_word": false,
150
+ "lstrip": false,
151
+ "rstrip": false,
152
+ "normalized": true,
153
+ "special": false
154
+ },
155
+ {
156
+ "id": 57532,
157
+ "content": "</s_price>",
158
+ "single_word": false,
159
+ "lstrip": false,
160
+ "rstrip": false,
161
+ "normalized": true,
162
+ "special": false
163
+ },
164
+ {
165
+ "id": 57533,
166
+ "content": "<s_sub_total>",
167
+ "single_word": false,
168
+ "lstrip": false,
169
+ "rstrip": false,
170
+ "normalized": true,
171
+ "special": false
172
+ },
173
+ {
174
+ "id": 57534,
175
+ "content": "</s_sub_total>",
176
+ "single_word": false,
177
+ "lstrip": false,
178
+ "rstrip": false,
179
+ "normalized": true,
180
+ "special": false
181
+ },
182
+ {
183
+ "id": 57535,
184
+ "content": "<s_subtotal_price>",
185
+ "single_word": false,
186
+ "lstrip": false,
187
+ "rstrip": false,
188
+ "normalized": true,
189
+ "special": false
190
+ },
191
+ {
192
+ "id": 57536,
193
+ "content": "</s_subtotal_price>",
194
+ "single_word": false,
195
+ "lstrip": false,
196
+ "rstrip": false,
197
+ "normalized": true,
198
+ "special": false
199
+ },
200
+ {
201
+ "id": 57537,
202
+ "content": "<s_service_price>",
203
+ "single_word": false,
204
+ "lstrip": false,
205
+ "rstrip": false,
206
+ "normalized": true,
207
+ "special": false
208
+ },
209
+ {
210
+ "id": 57538,
211
+ "content": "</s_service_price>",
212
+ "single_word": false,
213
+ "lstrip": false,
214
+ "rstrip": false,
215
+ "normalized": true,
216
+ "special": false
217
+ },
218
+ {
219
+ "id": 57539,
220
+ "content": "<s_tax_price>",
221
+ "single_word": false,
222
+ "lstrip": false,
223
+ "rstrip": false,
224
+ "normalized": true,
225
+ "special": false
226
+ },
227
+ {
228
+ "id": 57540,
229
+ "content": "</s_tax_price>",
230
+ "single_word": false,
231
+ "lstrip": false,
232
+ "rstrip": false,
233
+ "normalized": true,
234
+ "special": false
235
+ },
236
+ {
237
+ "id": 57541,
238
+ "content": "<s_etc>",
239
+ "single_word": false,
240
+ "lstrip": false,
241
+ "rstrip": false,
242
+ "normalized": true,
243
+ "special": false
244
+ },
245
+ {
246
+ "id": 57542,
247
+ "content": "</s_etc>",
248
+ "single_word": false,
249
+ "lstrip": false,
250
+ "rstrip": false,
251
+ "normalized": true,
252
+ "special": false
253
+ },
254
+ {
255
+ "id": 57543,
256
+ "content": "<s_total>",
257
+ "single_word": false,
258
+ "lstrip": false,
259
+ "rstrip": false,
260
+ "normalized": true,
261
+ "special": false
262
+ },
263
+ {
264
+ "id": 57544,
265
+ "content": "</s_total>",
266
+ "single_word": false,
267
+ "lstrip": false,
268
+ "rstrip": false,
269
+ "normalized": true,
270
+ "special": false
271
+ },
272
+ {
273
+ "id": 57545,
274
+ "content": "<s_total_price>",
275
+ "single_word": false,
276
+ "lstrip": false,
277
+ "rstrip": false,
278
+ "normalized": true,
279
+ "special": false
280
+ },
281
+ {
282
+ "id": 57546,
283
+ "content": "</s_total_price>",
284
+ "single_word": false,
285
+ "lstrip": false,
286
+ "rstrip": false,
287
+ "normalized": true,
288
+ "special": false
289
+ },
290
+ {
291
+ "id": 57547,
292
+ "content": "<s_sub>",
293
+ "single_word": false,
294
+ "lstrip": false,
295
+ "rstrip": false,
296
+ "normalized": true,
297
+ "special": false
298
+ },
299
+ {
300
+ "id": 57548,
301
+ "content": "</s_sub>",
302
+ "single_word": false,
303
+ "lstrip": false,
304
+ "rstrip": false,
305
+ "normalized": true,
306
+ "special": false
307
+ },
308
+ {
309
+ "id": 57549,
310
+ "content": "<s_cashprice>",
311
+ "single_word": false,
312
+ "lstrip": false,
313
+ "rstrip": false,
314
+ "normalized": true,
315
+ "special": false
316
+ },
317
+ {
318
+ "id": 57550,
319
+ "content": "</s_cashprice>",
320
+ "single_word": false,
321
+ "lstrip": false,
322
+ "rstrip": false,
323
+ "normalized": true,
324
+ "special": false
325
+ },
326
+ {
327
+ "id": 57551,
328
+ "content": "<s_changeprice>",
329
+ "single_word": false,
330
+ "lstrip": false,
331
+ "rstrip": false,
332
+ "normalized": true,
333
+ "special": false
334
+ },
335
+ {
336
+ "id": 57552,
337
+ "content": "</s_changeprice>",
338
+ "single_word": false,
339
+ "lstrip": false,
340
+ "rstrip": false,
341
+ "normalized": true,
342
+ "special": false
343
+ },
344
+ {
345
+ "id": 57553,
346
+ "content": "<s_menutype_cnt>",
347
+ "single_word": false,
348
+ "lstrip": false,
349
+ "rstrip": false,
350
+ "normalized": true,
351
+ "special": false
352
+ },
353
+ {
354
+ "id": 57554,
355
+ "content": "</s_menutype_cnt>",
356
+ "single_word": false,
357
+ "lstrip": false,
358
+ "rstrip": false,
359
+ "normalized": true,
360
+ "special": false
361
+ },
362
+ {
363
+ "id": 57555,
364
+ "content": "<s_menuqty_cnt>",
365
+ "single_word": false,
366
+ "lstrip": false,
367
+ "rstrip": false,
368
+ "normalized": true,
369
+ "special": false
370
+ },
371
+ {
372
+ "id": 57556,
373
+ "content": "</s_menuqty_cnt>",
374
+ "single_word": false,
375
+ "lstrip": false,
376
+ "rstrip": false,
377
+ "normalized": true,
378
+ "special": false
379
+ },
380
+ {
381
+ "id": 57557,
382
+ "content": "<s_discount_price>",
383
+ "single_word": false,
384
+ "lstrip": false,
385
+ "rstrip": false,
386
+ "normalized": true,
387
+ "special": false
388
+ },
389
+ {
390
+ "id": 57558,
391
+ "content": "</s_discount_price>",
392
+ "single_word": false,
393
+ "lstrip": false,
394
+ "rstrip": false,
395
+ "normalized": true,
396
+ "special": false
397
+ },
398
+ {
399
+ "id": 57559,
400
+ "content": "<s_unitprice>",
401
+ "single_word": false,
402
+ "lstrip": false,
403
+ "rstrip": false,
404
+ "normalized": true,
405
+ "special": false
406
+ },
407
+ {
408
+ "id": 57560,
409
+ "content": "</s_unitprice>",
410
+ "single_word": false,
411
+ "lstrip": false,
412
+ "rstrip": false,
413
+ "normalized": true,
414
+ "special": false
415
+ },
416
+ {
417
+ "id": 57561,
418
+ "content": "<s_total_etc>",
419
+ "single_word": false,
420
+ "lstrip": false,
421
+ "rstrip": false,
422
+ "normalized": true,
423
+ "special": false
424
+ },
425
+ {
426
+ "id": 57562,
427
+ "content": "</s_total_etc>",
428
+ "single_word": false,
429
+ "lstrip": false,
430
+ "rstrip": false,
431
+ "normalized": true,
432
+ "special": false
433
+ },
434
+ {
435
+ "id": 57563,
436
+ "content": "<s_creditcardprice>",
437
+ "single_word": false,
438
+ "lstrip": false,
439
+ "rstrip": false,
440
+ "normalized": true,
441
+ "special": false
442
+ },
443
+ {
444
+ "id": 57564,
445
+ "content": "</s_creditcardprice>",
446
+ "single_word": false,
447
+ "lstrip": false,
448
+ "rstrip": false,
449
+ "normalized": true,
450
+ "special": false
451
+ },
452
+ {
453
+ "id": 57565,
454
+ "content": "<s_>",
455
+ "single_word": false,
456
+ "lstrip": false,
457
+ "rstrip": false,
458
+ "normalized": true,
459
+ "special": false
460
+ },
461
+ {
462
+ "id": 57566,
463
+ "content": "</s_>",
464
+ "single_word": false,
465
+ "lstrip": false,
466
+ "rstrip": false,
467
+ "normalized": true,
468
+ "special": false
469
+ },
470
+ {
471
+ "id": 57567,
472
+ "content": "<s_vatyn>",
473
+ "single_word": false,
474
+ "lstrip": false,
475
+ "rstrip": false,
476
+ "normalized": true,
477
+ "special": false
478
+ },
479
+ {
480
+ "id": 57568,
481
+ "content": "</s_vatyn>",
482
+ "single_word": false,
483
+ "lstrip": false,
484
+ "rstrip": false,
485
+ "normalized": true,
486
+ "special": false
487
+ },
488
+ {
489
+ "id": 57569,
490
+ "content": "<s_discountprice>",
491
+ "single_word": false,
492
+ "lstrip": false,
493
+ "rstrip": false,
494
+ "normalized": true,
495
+ "special": false
496
+ },
497
+ {
498
+ "id": 57570,
499
+ "content": "</s_discountprice>",
500
+ "single_word": false,
501
+ "lstrip": false,
502
+ "rstrip": false,
503
+ "normalized": true,
504
+ "special": false
505
  }
506
  ],
507
  "normalizer": {