FlewRr commited on
Commit
c36bd87
·
verified ·
1 Parent(s): f800a4d

Training in progress, step 1000

Browse files
config.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "_name_or_path": "microsoft/trocr-base-handwritten",
3
  "architectures": [
4
  "VisionEncoderDecoderModel"
5
  ],
6
  "decoder": {
7
  "_name_or_path": "",
8
  "activation_dropout": 0.0,
9
- "activation_function": "gelu",
10
  "add_cross_attention": true,
11
  "architectures": null,
12
  "attention_dropout": 0.0,
@@ -15,12 +15,12 @@
15
  "bos_token_id": 0,
16
  "chunk_size_feed_forward": 0,
17
  "classifier_dropout": 0.0,
18
- "cross_attention_hidden_size": 768,
19
- "d_model": 1024,
20
- "decoder_attention_heads": 16,
21
- "decoder_ffn_dim": 4096,
22
  "decoder_layerdrop": 0.0,
23
- "decoder_layers": 12,
24
  "decoder_start_token_id": 2,
25
  "diversity_penalty": 0.0,
26
  "do_sample": false,
@@ -64,14 +64,14 @@
64
  "repetition_penalty": 1.0,
65
  "return_dict": true,
66
  "return_dict_in_generate": false,
67
- "scale_embedding": false,
68
  "sep_token_id": null,
69
  "suppress_tokens": null,
70
  "task_specific_params": null,
71
  "temperature": 1.0,
72
  "tf_legacy_loss": false,
73
  "tie_encoder_decoder": false,
74
- "tie_word_embeddings": true,
75
  "tokenizer_class": null,
76
  "top_k": 50,
77
  "top_p": 1.0,
@@ -81,7 +81,7 @@
81
  "use_bfloat16": false,
82
  "use_cache": false,
83
  "use_learned_position_embeddings": true,
84
- "vocab_size": 50265
85
  },
86
  "decoder_start_token_id": 0,
87
  "early_stopping": true,
@@ -108,14 +108,14 @@
108
  "forced_eos_token_id": null,
109
  "hidden_act": "gelu",
110
  "hidden_dropout_prob": 0.0,
111
- "hidden_size": 768,
112
  "id2label": {
113
  "0": "LABEL_0",
114
  "1": "LABEL_1"
115
  },
116
  "image_size": 384,
117
  "initializer_range": 0.02,
118
- "intermediate_size": 3072,
119
  "is_decoder": false,
120
  "is_encoder_decoder": false,
121
  "label2id": {
@@ -126,9 +126,9 @@
126
  "length_penalty": 1.0,
127
  "max_length": 20,
128
  "min_length": 0,
129
- "model_type": "vit",
130
  "no_repeat_ngram_size": 0,
131
- "num_attention_heads": 12,
132
  "num_beam_groups": 1,
133
  "num_beams": 1,
134
  "num_channels": 3,
@@ -142,7 +142,7 @@
142
  "prefix": null,
143
  "problem_type": null,
144
  "pruned_heads": {},
145
- "qkv_bias": false,
146
  "remove_invalid_values": false,
147
  "repetition_penalty": 1.0,
148
  "return_dict": true,
@@ -170,9 +170,8 @@
170
  "no_repeat_ngram_size": 3,
171
  "num_beams": 4,
172
  "pad_token_id": 1,
173
- "processor_class": "TrOCRProcessor",
174
  "tie_word_embeddings": false,
175
  "torch_dtype": "float32",
176
  "transformers_version": "4.44.2",
177
- "vocab_size": 50265
178
  }
 
1
  {
2
+ "_name_or_path": "microsoft/trocr-small-stage1",
3
  "architectures": [
4
  "VisionEncoderDecoderModel"
5
  ],
6
  "decoder": {
7
  "_name_or_path": "",
8
  "activation_dropout": 0.0,
9
+ "activation_function": "relu",
10
  "add_cross_attention": true,
11
  "architectures": null,
12
  "attention_dropout": 0.0,
 
15
  "bos_token_id": 0,
16
  "chunk_size_feed_forward": 0,
17
  "classifier_dropout": 0.0,
18
+ "cross_attention_hidden_size": 384,
19
+ "d_model": 256,
20
+ "decoder_attention_heads": 8,
21
+ "decoder_ffn_dim": 1024,
22
  "decoder_layerdrop": 0.0,
23
+ "decoder_layers": 6,
24
  "decoder_start_token_id": 2,
25
  "diversity_penalty": 0.0,
26
  "do_sample": false,
 
64
  "repetition_penalty": 1.0,
65
  "return_dict": true,
66
  "return_dict_in_generate": false,
67
+ "scale_embedding": true,
68
  "sep_token_id": null,
69
  "suppress_tokens": null,
70
  "task_specific_params": null,
71
  "temperature": 1.0,
72
  "tf_legacy_loss": false,
73
  "tie_encoder_decoder": false,
74
+ "tie_word_embeddings": false,
75
  "tokenizer_class": null,
76
  "top_k": 50,
77
  "top_p": 1.0,
 
81
  "use_bfloat16": false,
82
  "use_cache": false,
83
  "use_learned_position_embeddings": true,
84
+ "vocab_size": 64044
85
  },
86
  "decoder_start_token_id": 0,
87
  "early_stopping": true,
 
108
  "forced_eos_token_id": null,
109
  "hidden_act": "gelu",
110
  "hidden_dropout_prob": 0.0,
111
+ "hidden_size": 384,
112
  "id2label": {
113
  "0": "LABEL_0",
114
  "1": "LABEL_1"
115
  },
116
  "image_size": 384,
117
  "initializer_range": 0.02,
118
+ "intermediate_size": 1536,
119
  "is_decoder": false,
120
  "is_encoder_decoder": false,
121
  "label2id": {
 
126
  "length_penalty": 1.0,
127
  "max_length": 20,
128
  "min_length": 0,
129
+ "model_type": "deit",
130
  "no_repeat_ngram_size": 0,
131
+ "num_attention_heads": 6,
132
  "num_beam_groups": 1,
133
  "num_beams": 1,
134
  "num_channels": 3,
 
142
  "prefix": null,
143
  "problem_type": null,
144
  "pruned_heads": {},
145
+ "qkv_bias": true,
146
  "remove_invalid_values": false,
147
  "repetition_penalty": 1.0,
148
  "return_dict": true,
 
170
  "no_repeat_ngram_size": 3,
171
  "num_beams": 4,
172
  "pad_token_id": 1,
 
173
  "tie_word_embeddings": false,
174
  "torch_dtype": "float32",
175
  "transformers_version": "4.44.2",
176
+ "vocab_size": 64044
177
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0738ecfb4d730a4a7c78d45065de43c2bda86533a37cb76f41a51f426cb18823
3
- size 1335747032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9463f99a60406b70fd45948d2c81b5a8814e65a7c2a706578327a2aa4805eb8c
3
+ size 246430696
preprocessor_config.json CHANGED
@@ -1,4 +1,9 @@
1
  {
 
 
 
 
 
2
  "do_normalize": true,
3
  "do_rescale": true,
4
  "do_resize": true,
@@ -7,13 +12,13 @@
7
  0.5,
8
  0.5
9
  ],
10
- "image_processor_type": "ViTImageProcessor",
11
  "image_std": [
12
  0.5,
13
  0.5,
14
  0.5
15
  ],
16
- "resample": 2,
17
  "rescale_factor": 0.00392156862745098,
18
  "size": {
19
  "height": 384,
 
1
  {
2
+ "crop_size": {
3
+ "height": 224,
4
+ "width": 224
5
+ },
6
+ "do_center_crop": false,
7
  "do_normalize": true,
8
  "do_rescale": true,
9
  "do_resize": true,
 
12
  0.5,
13
  0.5
14
  ],
15
+ "image_processor_type": "DeiTImageProcessor",
16
  "image_std": [
17
  0.5,
18
  0.5,
19
  0.5
20
  ],
21
+ "resample": 3,
22
  "rescale_factor": 0.00392156862745098,
23
  "size": {
24
  "height": 384,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b87537a4c40bb212ce66b4f591484fbb6a149bc9b1b59349066cc5b0a6ecd600
3
  size 5304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6dd15a9a7ba5a4eebc5265a10d9ea1e6271df6fe100f065d0639401e31bac86a
3
  size 5304