steveroy commited on
Commit
cc60e81
·
1 Parent(s): 73b4cb5

Training done

Browse files
added_tokens.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "</s_invoice_no>": 57525,
3
- "</s_po_no>": 57527,
4
- "<s_cord>": 57529,
5
  "<s_iitcdip>": 57523,
6
- "<s_invoice_no>": 57526,
7
- "<s_po_no>": 57528,
 
8
  "<s_synthdog>": 57524,
9
  "<sep/>": 57522
10
  }
 
1
  {
2
+ "</s_invoice_no>": 57526,
3
+ "</s_po_no>": 57528,
 
4
  "<s_iitcdip>": 57523,
5
+ "<s_invoice>": 57529,
6
+ "<s_invoice_no>": 57525,
7
+ "<s_po_no>": 57527,
8
  "<s_synthdog>": 57524,
9
  "<sep/>": 57522
10
  }
preprocessor_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_align_long_axis": false,
3
+ "do_normalize": true,
4
+ "do_pad": true,
5
+ "do_rescale": true,
6
+ "do_resize": true,
7
+ "do_thumbnail": true,
8
+ "image_mean": [
9
+ 0.5,
10
+ 0.5,
11
+ 0.5
12
+ ],
13
+ "image_processor_type": "DonutImageProcessor",
14
+ "image_std": [
15
+ 0.5,
16
+ 0.5,
17
+ 0.5
18
+ ],
19
+ "processor_class": "DonutProcessor",
20
+ "resample": 2,
21
+ "rescale_factor": 0.00392156862745098,
22
+ "size": [
23
+ 960,
24
+ 1280
25
+ ]
26
+ }
special_tokens_map.json CHANGED
@@ -1,25 +1,11 @@
1
  {
2
- "bos_token": {
3
- "content": "<s>",
4
- "lstrip": false,
5
- "normalized": false,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "cls_token": {
10
- "content": "<s>",
11
- "lstrip": false,
12
- "normalized": false,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "eos_token": {
17
- "content": "</s>",
18
- "lstrip": false,
19
- "normalized": false,
20
- "rstrip": false,
21
- "single_word": false
22
- },
23
  "mask_token": {
24
  "content": "<mask>",
25
  "lstrip": true,
@@ -27,25 +13,7 @@
27
  "rstrip": false,
28
  "single_word": false
29
  },
30
- "pad_token": {
31
- "content": "<pad>",
32
- "lstrip": false,
33
- "normalized": false,
34
- "rstrip": false,
35
- "single_word": false
36
- },
37
- "sep_token": {
38
- "content": "</s>",
39
- "lstrip": false,
40
- "normalized": false,
41
- "rstrip": false,
42
- "single_word": false
43
- },
44
- "unk_token": {
45
- "content": "<unk>",
46
- "lstrip": false,
47
- "normalized": false,
48
- "rstrip": false,
49
- "single_word": false
50
- }
51
  }
 
1
  {
2
+ "additional_special_tokens": [
3
+ "<s_iitcdip>",
4
+ "<s_synthdog>"
5
+ ],
6
+ "bos_token": "<s>",
7
+ "cls_token": "<s>",
8
+ "eos_token": "</s>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  "mask_token": {
10
  "content": "<mask>",
11
  "lstrip": true,
 
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
+ "pad_token": "<pad>",
17
+ "sep_token": "</s>",
18
+ "unk_token": "<unk>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -43,10 +43,10 @@
43
  "57522": {
44
  "content": "<sep/>",
45
  "lstrip": false,
46
- "normalized": false,
47
  "rstrip": false,
48
  "single_word": false,
49
- "special": true
50
  },
51
  "57523": {
52
  "content": "<s_iitcdip>",
@@ -65,52 +65,54 @@
65
  "special": true
66
  },
67
  "57525": {
68
- "content": "</s_invoice_no>",
69
  "lstrip": false,
70
- "normalized": false,
71
  "rstrip": false,
72
  "single_word": false,
73
- "special": true
74
  },
75
  "57526": {
76
- "content": "<s_invoice_no>",
77
  "lstrip": false,
78
- "normalized": false,
79
  "rstrip": false,
80
  "single_word": false,
81
- "special": true
82
  },
83
  "57527": {
84
- "content": "</s_po_no>",
85
  "lstrip": false,
86
- "normalized": false,
87
  "rstrip": false,
88
  "single_word": false,
89
- "special": true
90
  },
91
  "57528": {
92
- "content": "<s_po_no>",
93
  "lstrip": false,
94
- "normalized": false,
95
  "rstrip": false,
96
  "single_word": false,
97
- "special": true
98
  },
99
  "57529": {
100
- "content": "<s_cord>",
101
  "lstrip": false,
102
- "normalized": false,
103
  "rstrip": false,
104
  "single_word": false,
105
- "special": true
106
  }
107
  },
108
- "additional_special_tokens": [],
 
 
 
109
  "bos_token": "<s>",
110
  "clean_up_tokenization_spaces": true,
111
  "cls_token": "<s>",
112
  "eos_token": "</s>",
113
- "from_slow": true,
114
  "mask_token": "<mask>",
115
  "model_max_length": 1000000000000000019884624838656,
116
  "pad_token": "<pad>",
 
43
  "57522": {
44
  "content": "<sep/>",
45
  "lstrip": false,
46
+ "normalized": true,
47
  "rstrip": false,
48
  "single_word": false,
49
+ "special": false
50
  },
51
  "57523": {
52
  "content": "<s_iitcdip>",
 
65
  "special": true
66
  },
67
  "57525": {
68
+ "content": "<s_invoice_no>",
69
  "lstrip": false,
70
+ "normalized": true,
71
  "rstrip": false,
72
  "single_word": false,
73
+ "special": false
74
  },
75
  "57526": {
76
+ "content": "</s_invoice_no>",
77
  "lstrip": false,
78
+ "normalized": true,
79
  "rstrip": false,
80
  "single_word": false,
81
+ "special": false
82
  },
83
  "57527": {
84
+ "content": "<s_po_no>",
85
  "lstrip": false,
86
+ "normalized": true,
87
  "rstrip": false,
88
  "single_word": false,
89
+ "special": false
90
  },
91
  "57528": {
92
+ "content": "</s_po_no>",
93
  "lstrip": false,
94
+ "normalized": true,
95
  "rstrip": false,
96
  "single_word": false,
97
+ "special": false
98
  },
99
  "57529": {
100
+ "content": "<s_invoice>",
101
  "lstrip": false,
102
+ "normalized": true,
103
  "rstrip": false,
104
  "single_word": false,
105
+ "special": false
106
  }
107
  },
108
+ "additional_special_tokens": [
109
+ "<s_iitcdip>",
110
+ "<s_synthdog>"
111
+ ],
112
  "bos_token": "<s>",
113
  "clean_up_tokenization_spaces": true,
114
  "cls_token": "<s>",
115
  "eos_token": "</s>",
 
116
  "mask_token": "<mask>",
117
  "model_max_length": 1000000000000000019884624838656,
118
  "pad_token": "<pad>",