Edgar404 commited on
Commit
810d63c
·
verified ·
1 Parent(s): 3e01fe3

Training done

Browse files
added_tokens.json CHANGED
@@ -1,14 +1,16 @@
1
  {
2
- "</s_address>": 57532,
3
- "</s_company>": 57530,
4
- "</s_date>": 57528,
5
- "</s_total>": 57526,
6
- "<s_address>": 57531,
7
- "<s_company>": 57529,
8
- "<s_cord-v2>": 57533,
9
- "<s_date>": 57527,
 
 
10
  "<s_iitcdip>": 57523,
 
11
  "<s_synthdog>": 57524,
12
- "<s_total>": 57525,
13
  "<sep/>": 57522
14
  }
 
1
  {
2
+ "</s_amt_in_figures>": 57528,
3
+ "</s_amt_in_words>": 57526,
4
+ "</s_bank_name>": 57534,
5
+ "</s_cheque_date>": 57532,
6
+ "</s_payee_name>": 57530,
7
+ "<s_amt_in_figures>": 57527,
8
+ "<s_amt_in_words>": 57525,
9
+ "<s_bank_name>": 57533,
10
+ "<s_cheque_date>": 57531,
11
+ "<s_cord-v2>": 57535,
12
  "<s_iitcdip>": 57523,
13
+ "<s_payee_name>": 57529,
14
  "<s_synthdog>": 57524,
 
15
  "<sep/>": 57522
16
  }
preprocessor_config.json CHANGED
@@ -37,8 +37,8 @@
37
  "processor_class": "DonutProcessor",
38
  "resample": 2,
39
  "rescale_factor": 0.00392156862745098,
40
- "size": {
41
- "height": 720,
42
- "width": 960
43
- }
44
  }
 
37
  "processor_class": "DonutProcessor",
38
  "resample": 2,
39
  "rescale_factor": 0.00392156862745098,
40
+ "size": [
41
+ 960,
42
+ 720
43
+ ]
44
  }
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -65,7 +65,7 @@
65
  "special": true
66
  },
67
  "57525": {
68
- "content": "<s_total>",
69
  "lstrip": false,
70
  "normalized": true,
71
  "rstrip": false,
@@ -73,7 +73,7 @@
73
  "special": false
74
  },
75
  "57526": {
76
- "content": "</s_total>",
77
  "lstrip": false,
78
  "normalized": true,
79
  "rstrip": false,
@@ -81,7 +81,7 @@
81
  "special": false
82
  },
83
  "57527": {
84
- "content": "<s_date>",
85
  "lstrip": false,
86
  "normalized": true,
87
  "rstrip": false,
@@ -89,7 +89,7 @@
89
  "special": false
90
  },
91
  "57528": {
92
- "content": "</s_date>",
93
  "lstrip": false,
94
  "normalized": true,
95
  "rstrip": false,
@@ -97,7 +97,7 @@
97
  "special": false
98
  },
99
  "57529": {
100
- "content": "<s_company>",
101
  "lstrip": false,
102
  "normalized": true,
103
  "rstrip": false,
@@ -105,7 +105,7 @@
105
  "special": false
106
  },
107
  "57530": {
108
- "content": "</s_company>",
109
  "lstrip": false,
110
  "normalized": true,
111
  "rstrip": false,
@@ -113,7 +113,7 @@
113
  "special": false
114
  },
115
  "57531": {
116
- "content": "<s_address>",
117
  "lstrip": false,
118
  "normalized": true,
119
  "rstrip": false,
@@ -121,7 +121,7 @@
121
  "special": false
122
  },
123
  "57532": {
124
- "content": "</s_address>",
125
  "lstrip": false,
126
  "normalized": true,
127
  "rstrip": false,
@@ -129,6 +129,22 @@
129
  "special": false
130
  },
131
  "57533": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  "content": "<s_cord-v2>",
133
  "lstrip": false,
134
  "normalized": true,
@@ -146,18 +162,11 @@
146
  "cls_token": "<s>",
147
  "eos_token": "</s>",
148
  "mask_token": "<mask>",
149
- "max_length": 512,
150
  "model_max_length": 1000000000000000019884624838656,
151
- "pad_to_multiple_of": null,
152
  "pad_token": "<pad>",
153
- "pad_token_type_id": 0,
154
- "padding_side": "right",
155
  "processor_class": "DonutProcessor",
156
  "sep_token": "</s>",
157
  "sp_model_kwargs": {},
158
- "stride": 0,
159
  "tokenizer_class": "XLMRobertaTokenizer",
160
- "truncation_side": "right",
161
- "truncation_strategy": "longest_first",
162
  "unk_token": "<unk>"
163
  }
 
65
  "special": true
66
  },
67
  "57525": {
68
+ "content": "<s_amt_in_words>",
69
  "lstrip": false,
70
  "normalized": true,
71
  "rstrip": false,
 
73
  "special": false
74
  },
75
  "57526": {
76
+ "content": "</s_amt_in_words>",
77
  "lstrip": false,
78
  "normalized": true,
79
  "rstrip": false,
 
81
  "special": false
82
  },
83
  "57527": {
84
+ "content": "<s_amt_in_figures>",
85
  "lstrip": false,
86
  "normalized": true,
87
  "rstrip": false,
 
89
  "special": false
90
  },
91
  "57528": {
92
+ "content": "</s_amt_in_figures>",
93
  "lstrip": false,
94
  "normalized": true,
95
  "rstrip": false,
 
97
  "special": false
98
  },
99
  "57529": {
100
+ "content": "<s_payee_name>",
101
  "lstrip": false,
102
  "normalized": true,
103
  "rstrip": false,
 
105
  "special": false
106
  },
107
  "57530": {
108
+ "content": "</s_payee_name>",
109
  "lstrip": false,
110
  "normalized": true,
111
  "rstrip": false,
 
113
  "special": false
114
  },
115
  "57531": {
116
+ "content": "<s_cheque_date>",
117
  "lstrip": false,
118
  "normalized": true,
119
  "rstrip": false,
 
121
  "special": false
122
  },
123
  "57532": {
124
+ "content": "</s_cheque_date>",
125
  "lstrip": false,
126
  "normalized": true,
127
  "rstrip": false,
 
129
  "special": false
130
  },
131
  "57533": {
132
+ "content": "<s_bank_name>",
133
+ "lstrip": false,
134
+ "normalized": true,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": false
138
+ },
139
+ "57534": {
140
+ "content": "</s_bank_name>",
141
+ "lstrip": false,
142
+ "normalized": true,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": false
146
+ },
147
+ "57535": {
148
  "content": "<s_cord-v2>",
149
  "lstrip": false,
150
  "normalized": true,
 
162
  "cls_token": "<s>",
163
  "eos_token": "</s>",
164
  "mask_token": "<mask>",
 
165
  "model_max_length": 1000000000000000019884624838656,
 
166
  "pad_token": "<pad>",
 
 
167
  "processor_class": "DonutProcessor",
168
  "sep_token": "</s>",
169
  "sp_model_kwargs": {},
 
170
  "tokenizer_class": "XLMRobertaTokenizer",
 
 
171
  "unk_token": "<unk>"
172
  }