jeevan-23 commited on
Commit
7894184
·
1 Parent(s): 595a400

End of training

Browse files
Files changed (5) hide show
  1. README.md +3 -1
  2. added_tokens.json +22 -0
  3. special_tokens_map.json +23 -1
  4. tokenizer.json +198 -0
  5. tokenizer_config.json +199 -1
README.md CHANGED
@@ -1,4 +1,6 @@
1
  ---
 
 
2
  tags:
3
  - generated_from_trainer
4
  datasets:
@@ -13,7 +15,7 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  # jupy-model_v5
15
 
16
- This model was trained from scratch on the imagefolder dataset.
17
 
18
  ## Model description
19
 
 
1
  ---
2
+ license: mit
3
+ base_model: naver-clova-ix/donut-base
4
  tags:
5
  - generated_from_trainer
6
  datasets:
 
15
 
16
  # jupy-model_v5
17
 
18
+ This model is a fine-tuned version of [naver-clova-ix/donut-base](https://huggingface.co/naver-clova-ix/donut-base) on the imagefolder dataset.
19
 
20
  ## Model description
21
 
added_tokens.json CHANGED
@@ -1,5 +1,27 @@
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "<s_iitcdip>": 57523,
 
 
 
3
  "<s_synthdog>": 57524,
 
4
  "<sep/>": 57522
5
  }
 
1
  {
2
+ "</s_EI_number>": 57545,
3
+ "</s_Exempt_FATCA_code>": 57539,
4
+ "</s_Exempt_pay_code>": 57538,
5
+ "</s_account_number>": 57528,
6
+ "</s_address>": 57526,
7
+ "</s_business_name>": 57531,
8
+ "</s_date>": 57525,
9
+ "</s_multi_line_address>": 57535,
10
+ "</s_name>": 57527,
11
+ "</s_ss_number>": 57534,
12
+ "</s_zip_address>": 57542,
13
+ "<s_EI_number>": 57533,
14
+ "<s_Exempt_FATCA_code>": 57532,
15
+ "<s_Exempt_pay_code>": 57544,
16
+ "<s_account_number>": 57546,
17
+ "<s_address>": 57540,
18
+ "<s_business_name>": 57529,
19
+ "<s_date>": 57536,
20
  "<s_iitcdip>": 57523,
21
+ "<s_multi_line_address>": 57530,
22
+ "<s_name>": 57543,
23
+ "<s_ss_number>": 57537,
24
  "<s_synthdog>": 57524,
25
+ "<s_zip_address>": 57541,
26
  "<sep/>": 57522
27
  }
special_tokens_map.json CHANGED
@@ -1,7 +1,29 @@
1
  {
2
  "additional_special_tokens": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  "</s>",
4
- "<s>"
 
 
5
  ],
6
  "bos_token": {
7
  "content": "<s>",
 
1
  {
2
  "additional_special_tokens": [
3
+ "</s_date>",
4
+ "</s_address>",
5
+ "</s_name>",
6
+ "</s_account_number>",
7
+ "<s_business_name>",
8
+ "<s_multi_line_address>",
9
+ "</s_business_name>",
10
+ "<s_Exempt_FATCA_code>",
11
+ "<s_EI_number>",
12
+ "</s_ss_number>",
13
+ "<s>",
14
+ "</s_multi_line_address>",
15
+ "<s_date>",
16
+ "<s_ss_number>",
17
+ "</s_Exempt_pay_code>",
18
+ "</s_Exempt_FATCA_code>",
19
+ "<s_address>",
20
+ "<s_zip_address>",
21
+ "</s_zip_address>",
22
+ "<s_name>",
23
  "</s>",
24
+ "<s_Exempt_pay_code>",
25
+ "</s_EI_number>",
26
+ "<s_account_number>"
27
  ],
28
  "bos_token": {
29
  "content": "<s>",
tokenizer.json CHANGED
@@ -88,6 +88,204 @@
88
  "rstrip": false,
89
  "normalized": false,
90
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  }
92
  ],
93
  "normalizer": {
 
88
  "rstrip": false,
89
  "normalized": false,
90
  "special": true
91
+ },
92
+ {
93
+ "id": 57525,
94
+ "content": "</s_date>",
95
+ "single_word": false,
96
+ "lstrip": false,
97
+ "rstrip": false,
98
+ "normalized": false,
99
+ "special": true
100
+ },
101
+ {
102
+ "id": 57526,
103
+ "content": "</s_address>",
104
+ "single_word": false,
105
+ "lstrip": false,
106
+ "rstrip": false,
107
+ "normalized": false,
108
+ "special": true
109
+ },
110
+ {
111
+ "id": 57527,
112
+ "content": "</s_name>",
113
+ "single_word": false,
114
+ "lstrip": false,
115
+ "rstrip": false,
116
+ "normalized": false,
117
+ "special": true
118
+ },
119
+ {
120
+ "id": 57528,
121
+ "content": "</s_account_number>",
122
+ "single_word": false,
123
+ "lstrip": false,
124
+ "rstrip": false,
125
+ "normalized": false,
126
+ "special": true
127
+ },
128
+ {
129
+ "id": 57529,
130
+ "content": "<s_business_name>",
131
+ "single_word": false,
132
+ "lstrip": false,
133
+ "rstrip": false,
134
+ "normalized": false,
135
+ "special": true
136
+ },
137
+ {
138
+ "id": 57530,
139
+ "content": "<s_multi_line_address>",
140
+ "single_word": false,
141
+ "lstrip": false,
142
+ "rstrip": false,
143
+ "normalized": false,
144
+ "special": true
145
+ },
146
+ {
147
+ "id": 57531,
148
+ "content": "</s_business_name>",
149
+ "single_word": false,
150
+ "lstrip": false,
151
+ "rstrip": false,
152
+ "normalized": false,
153
+ "special": true
154
+ },
155
+ {
156
+ "id": 57532,
157
+ "content": "<s_Exempt_FATCA_code>",
158
+ "single_word": false,
159
+ "lstrip": false,
160
+ "rstrip": false,
161
+ "normalized": false,
162
+ "special": true
163
+ },
164
+ {
165
+ "id": 57533,
166
+ "content": "<s_EI_number>",
167
+ "single_word": false,
168
+ "lstrip": false,
169
+ "rstrip": false,
170
+ "normalized": false,
171
+ "special": true
172
+ },
173
+ {
174
+ "id": 57534,
175
+ "content": "</s_ss_number>",
176
+ "single_word": false,
177
+ "lstrip": false,
178
+ "rstrip": false,
179
+ "normalized": false,
180
+ "special": true
181
+ },
182
+ {
183
+ "id": 57535,
184
+ "content": "</s_multi_line_address>",
185
+ "single_word": false,
186
+ "lstrip": false,
187
+ "rstrip": false,
188
+ "normalized": false,
189
+ "special": true
190
+ },
191
+ {
192
+ "id": 57536,
193
+ "content": "<s_date>",
194
+ "single_word": false,
195
+ "lstrip": false,
196
+ "rstrip": false,
197
+ "normalized": false,
198
+ "special": true
199
+ },
200
+ {
201
+ "id": 57537,
202
+ "content": "<s_ss_number>",
203
+ "single_word": false,
204
+ "lstrip": false,
205
+ "rstrip": false,
206
+ "normalized": false,
207
+ "special": true
208
+ },
209
+ {
210
+ "id": 57538,
211
+ "content": "</s_Exempt_pay_code>",
212
+ "single_word": false,
213
+ "lstrip": false,
214
+ "rstrip": false,
215
+ "normalized": false,
216
+ "special": true
217
+ },
218
+ {
219
+ "id": 57539,
220
+ "content": "</s_Exempt_FATCA_code>",
221
+ "single_word": false,
222
+ "lstrip": false,
223
+ "rstrip": false,
224
+ "normalized": false,
225
+ "special": true
226
+ },
227
+ {
228
+ "id": 57540,
229
+ "content": "<s_address>",
230
+ "single_word": false,
231
+ "lstrip": false,
232
+ "rstrip": false,
233
+ "normalized": false,
234
+ "special": true
235
+ },
236
+ {
237
+ "id": 57541,
238
+ "content": "<s_zip_address>",
239
+ "single_word": false,
240
+ "lstrip": false,
241
+ "rstrip": false,
242
+ "normalized": false,
243
+ "special": true
244
+ },
245
+ {
246
+ "id": 57542,
247
+ "content": "</s_zip_address>",
248
+ "single_word": false,
249
+ "lstrip": false,
250
+ "rstrip": false,
251
+ "normalized": false,
252
+ "special": true
253
+ },
254
+ {
255
+ "id": 57543,
256
+ "content": "<s_name>",
257
+ "single_word": false,
258
+ "lstrip": false,
259
+ "rstrip": false,
260
+ "normalized": false,
261
+ "special": true
262
+ },
263
+ {
264
+ "id": 57544,
265
+ "content": "<s_Exempt_pay_code>",
266
+ "single_word": false,
267
+ "lstrip": false,
268
+ "rstrip": false,
269
+ "normalized": false,
270
+ "special": true
271
+ },
272
+ {
273
+ "id": 57545,
274
+ "content": "</s_EI_number>",
275
+ "single_word": false,
276
+ "lstrip": false,
277
+ "rstrip": false,
278
+ "normalized": false,
279
+ "special": true
280
+ },
281
+ {
282
+ "id": 57546,
283
+ "content": "<s_account_number>",
284
+ "single_word": false,
285
+ "lstrip": false,
286
+ "rstrip": false,
287
+ "normalized": false,
288
+ "special": true
289
  }
290
  ],
291
  "normalizer": {
tokenizer_config.json CHANGED
@@ -63,11 +63,209 @@
63
  "rstrip": false,
64
  "single_word": false,
65
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  }
67
  },
68
  "additional_special_tokens": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  "</s>",
70
- "<s>"
 
 
71
  ],
72
  "bos_token": "<s>",
73
  "clean_up_tokenization_spaces": true,
 
63
  "rstrip": false,
64
  "single_word": false,
65
  "special": true
66
+ },
67
+ "57525": {
68
+ "content": "</s_date>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "57526": {
76
+ "content": "</s_address>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "57527": {
84
+ "content": "</s_name>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "57528": {
92
+ "content": "</s_account_number>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "57529": {
100
+ "content": "<s_business_name>",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "57530": {
108
+ "content": "<s_multi_line_address>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "57531": {
116
+ "content": "</s_business_name>",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "57532": {
124
+ "content": "<s_Exempt_FATCA_code>",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "57533": {
132
+ "content": "<s_EI_number>",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ },
139
+ "57534": {
140
+ "content": "</s_ss_number>",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": true
146
+ },
147
+ "57535": {
148
+ "content": "</s_multi_line_address>",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": true
154
+ },
155
+ "57536": {
156
+ "content": "<s_date>",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "57537": {
164
+ "content": "<s_ss_number>",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "57538": {
172
+ "content": "</s_Exempt_pay_code>",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ },
179
+ "57539": {
180
+ "content": "</s_Exempt_FATCA_code>",
181
+ "lstrip": false,
182
+ "normalized": false,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": true
186
+ },
187
+ "57540": {
188
+ "content": "<s_address>",
189
+ "lstrip": false,
190
+ "normalized": false,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": true
194
+ },
195
+ "57541": {
196
+ "content": "<s_zip_address>",
197
+ "lstrip": false,
198
+ "normalized": false,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": true
202
+ },
203
+ "57542": {
204
+ "content": "</s_zip_address>",
205
+ "lstrip": false,
206
+ "normalized": false,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": true
210
+ },
211
+ "57543": {
212
+ "content": "<s_name>",
213
+ "lstrip": false,
214
+ "normalized": false,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": true
218
+ },
219
+ "57544": {
220
+ "content": "<s_Exempt_pay_code>",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": true
226
+ },
227
+ "57545": {
228
+ "content": "</s_EI_number>",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": true
234
+ },
235
+ "57546": {
236
+ "content": "<s_account_number>",
237
+ "lstrip": false,
238
+ "normalized": false,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": true
242
  }
243
  },
244
  "additional_special_tokens": [
245
+ "</s_date>",
246
+ "</s_address>",
247
+ "</s_name>",
248
+ "</s_account_number>",
249
+ "<s_business_name>",
250
+ "<s_multi_line_address>",
251
+ "</s_business_name>",
252
+ "<s_Exempt_FATCA_code>",
253
+ "<s_EI_number>",
254
+ "</s_ss_number>",
255
+ "<s>",
256
+ "</s_multi_line_address>",
257
+ "<s_date>",
258
+ "<s_ss_number>",
259
+ "</s_Exempt_pay_code>",
260
+ "</s_Exempt_FATCA_code>",
261
+ "<s_address>",
262
+ "<s_zip_address>",
263
+ "</s_zip_address>",
264
+ "<s_name>",
265
  "</s>",
266
+ "<s_Exempt_pay_code>",
267
+ "</s_EI_number>",
268
+ "<s_account_number>"
269
  ],
270
  "bos_token": "<s>",
271
  "clean_up_tokenization_spaces": true,