Raghavan commited on
Commit
5d59e82
·
1 Parent(s): b727de5

Upload 4 files

Browse files
Files changed (2) hide show
  1. config.json +273 -178
  2. model.safetensors +2 -2
config.json CHANGED
@@ -2,202 +2,294 @@
2
  "architectures": [
3
  "FastForSceneTextRecognition"
4
  ],
5
- "backbone_act_func": "relu",
6
- "backbone_bias": false,
7
- "backbone_dilation": 1,
8
- "backbone_dropout_rate": 0,
9
- "backbone_groups": 1,
10
- "backbone_has_shuffle": false,
11
- "backbone_in_channels": 3,
12
- "backbone_kernel_size": 3,
13
- "backbone_ops_order": "weight_bn_act",
14
- "backbone_out_channels": 64,
15
- "backbone_stage1_dilation": [
16
- 1,
17
- 1,
18
- 1
19
- ],
20
- "backbone_stage1_groups": [
21
- 1,
22
- 1,
23
- 1
24
- ],
25
- "backbone_stage1_in_channels": [
26
- 64,
27
- 64,
28
- 64
29
- ],
30
- "backbone_stage1_kernel_size": [
31
- [
32
  3,
33
- 3
 
 
34
  ],
35
- [
36
- 3,
37
- 3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  ],
39
- [
40
- 3,
41
- 3
42
- ]
43
- ],
44
- "backbone_stage1_out_channels": [
45
- 64,
46
- 64,
47
- 64
48
- ],
49
- "backbone_stage1_stride": [
50
- 1,
51
- 2,
52
- 1
53
- ],
54
- "backbone_stage2_dilation": [
55
- 1,
56
- 1,
57
- 1,
58
- 1
59
- ],
60
- "backbone_stage2_groups": [
61
- 1,
62
- 1,
63
- 1,
64
- 1
65
- ],
66
- "backbone_stage2_in_channels": [
67
- 64,
68
- 128,
69
- 128,
70
- 128
71
- ],
72
- "backbone_stage2_kernel_size": [
73
- [
74
- 3,
75
- 3
76
  ],
77
- [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  1,
79
- 3
 
80
  ],
81
- [
82
- 3,
83
- 3
 
84
  ],
85
- [
86
- 3,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  1
88
- ]
89
- ],
90
- "backbone_stage2_out_channels": [
91
- 128,
92
- 128,
93
- 128,
94
- 128
95
- ],
96
- "backbone_stage2_stride": [
97
- 2,
98
- 1,
99
- 1,
100
- 1
101
- ],
102
- "backbone_stage3_dilation": [
103
- 1,
104
- 1,
105
- 1,
106
- 1
107
- ],
108
- "backbone_stage3_groups": [
109
- 1,
110
- 1,
111
- 1,
112
- 1
113
- ],
114
- "backbone_stage3_in_channels": [
115
- 128,
116
- 256,
117
- 256,
118
- 256
119
- ],
120
- "backbone_stage3_kernel_size": [
121
- [
122
- 3,
123
- 3
124
  ],
125
- [
126
- 3,
127
- 3
 
 
128
  ],
129
- [
130
- 3,
 
 
131
  1
132
  ],
133
- [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  1,
135
- 3
136
- ]
137
- ],
138
- "backbone_stage3_out_channels": [
139
- 256,
140
- 256,
141
- 256,
142
- 256
143
- ],
144
- "backbone_stage3_stride": [
145
- 2,
146
- 1,
147
- 1,
148
- 1
149
- ],
150
- "backbone_stage4_dilation": [
151
- 1,
152
- 1,
153
- 1,
154
- 1
155
- ],
156
- "backbone_stage4_groups": [
157
- 1,
158
- 1,
159
- 1,
160
- 1
161
- ],
162
- "backbone_stage4_in_channels": [
163
- 256,
164
- 512,
165
- 512,
166
- 512
167
- ],
168
- "backbone_stage4_kernel_size": [
169
- [
170
- 3,
171
- 3
172
  ],
173
- [
174
- 3,
 
 
175
  1
176
  ],
177
- [
178
  1,
179
- 3
 
 
180
  ],
181
- [
182
- 3,
183
- 3
184
- ]
185
- ],
186
- "backbone_stage4_out_channels": [
187
- 512,
188
- 512,
189
- 512,
190
- 512
191
- ],
192
- "backbone_stage4_stride": [
193
- 2,
194
- 1,
195
- 1,
196
- 1
197
- ],
198
- "backbone_stride": 2,
199
- "backbone_use_bn": true,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
  "bbox_type": "rect",
 
201
  "head_conv_dilation": 1,
202
  "head_conv_groups": 1,
203
  "head_conv_in_channels": 512,
@@ -272,6 +364,9 @@
272
  1,
273
  1
274
  ],
 
275
  "torch_dtype": "float32",
276
- "transformers_version": "4.35.0.dev0"
 
 
277
  }
 
2
  "architectures": [
3
  "FastForSceneTextRecognition"
4
  ],
5
+ "backbone": null,
6
+ "backbone_config": {
7
+ "_name_or_path": "",
8
+ "act_func": "relu",
9
+ "add_cross_attention": false,
10
+ "architectures": null,
11
+ "bad_words_ids": null,
12
+ "begin_suppress_tokens": null,
13
+ "bias": false,
14
+ "bos_token_id": null,
15
+ "chunk_size_feed_forward": 0,
16
+ "cross_attention_hidden_size": null,
17
+ "decoder_start_token_id": null,
18
+ "depths": [
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  3,
20
+ 4,
21
+ 4,
22
+ 4
23
  ],
24
+ "dilation": 1,
25
+ "diversity_penalty": 0.0,
26
+ "do_sample": false,
27
+ "dropout_rate": 0,
28
+ "early_stopping": false,
29
+ "encoder_no_repeat_ngram_size": 0,
30
+ "eos_token_id": null,
31
+ "exponential_decay_length_penalty": null,
32
+ "finetuning_task": null,
33
+ "forced_bos_token_id": null,
34
+ "forced_eos_token_id": null,
35
+ "groups": 1,
36
+ "has_shuffle": false,
37
+ "hidden_sizes": [
38
+ 64,
39
+ 64,
40
+ 128,
41
+ 256,
42
+ 512
43
  ],
44
+ "id2label": {
45
+ "0": "LABEL_0",
46
+ "1": "LABEL_1"
47
+ },
48
+ "in_channels": 3,
49
+ "initializer_range": 0.02,
50
+ "is_decoder": false,
51
+ "is_encoder_decoder": false,
52
+ "kernel_size": 3,
53
+ "label2id": {
54
+ "LABEL_0": 0,
55
+ "LABEL_1": 1
56
+ },
57
+ "length_penalty": 1.0,
58
+ "max_length": 20,
59
+ "min_length": 0,
60
+ "model_type": "textnet",
61
+ "no_repeat_ngram_size": 0,
62
+ "num_beam_groups": 1,
63
+ "num_beams": 1,
64
+ "num_return_sequences": 1,
65
+ "ops_order": "weight_bn_act",
66
+ "out_channels": 64,
67
+ "out_features": [
68
+ "stage4"
 
 
 
 
 
 
 
 
 
 
 
 
69
  ],
70
+ "out_indices": [
71
+ 4
72
+ ],
73
+ "output_attentions": false,
74
+ "output_hidden_states": false,
75
+ "output_scores": false,
76
+ "pad_token_id": null,
77
+ "prefix": null,
78
+ "problem_type": null,
79
+ "pruned_heads": {},
80
+ "remove_invalid_values": false,
81
+ "repetition_penalty": 1.0,
82
+ "return_dict": true,
83
+ "return_dict_in_generate": false,
84
+ "sep_token_id": null,
85
+ "stage1_dilation": [
86
  1,
87
+ 1,
88
+ 1
89
  ],
90
+ "stage1_groups": [
91
+ 1,
92
+ 1,
93
+ 1
94
  ],
95
+ "stage1_in_channels": [
96
+ 64,
97
+ 64,
98
+ 64
99
+ ],
100
+ "stage1_kernel_size": [
101
+ [
102
+ 3,
103
+ 3
104
+ ],
105
+ [
106
+ 3,
107
+ 3
108
+ ],
109
+ [
110
+ 3,
111
+ 3
112
+ ]
113
+ ],
114
+ "stage1_out_channels": [
115
+ 64,
116
+ 64,
117
+ 64
118
+ ],
119
+ "stage1_stride": [
120
+ 1,
121
+ 2,
122
  1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  ],
124
+ "stage2_dilation": [
125
+ 1,
126
+ 1,
127
+ 1,
128
+ 1
129
  ],
130
+ "stage2_groups": [
131
+ 1,
132
+ 1,
133
+ 1,
134
  1
135
  ],
136
+ "stage2_in_channels": [
137
+ 64,
138
+ 128,
139
+ 128,
140
+ 128
141
+ ],
142
+ "stage2_kernel_size": [
143
+ [
144
+ 3,
145
+ 3
146
+ ],
147
+ [
148
+ 1,
149
+ 3
150
+ ],
151
+ [
152
+ 3,
153
+ 3
154
+ ],
155
+ [
156
+ 3,
157
+ 1
158
+ ]
159
+ ],
160
+ "stage2_out_channels": [
161
+ 128,
162
+ 128,
163
+ 128,
164
+ 128
165
+ ],
166
+ "stage2_stride": [
167
+ 2,
168
  1,
169
+ 1,
170
+ 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
  ],
172
+ "stage3_dilation": [
173
+ 1,
174
+ 1,
175
+ 1,
176
  1
177
  ],
178
+ "stage3_groups": [
179
  1,
180
+ 1,
181
+ 1,
182
+ 1
183
  ],
184
+ "stage3_in_channels": [
185
+ 128,
186
+ 256,
187
+ 256,
188
+ 256
189
+ ],
190
+ "stage3_kernel_size": [
191
+ [
192
+ 3,
193
+ 3
194
+ ],
195
+ [
196
+ 3,
197
+ 3
198
+ ],
199
+ [
200
+ 3,
201
+ 1
202
+ ],
203
+ [
204
+ 1,
205
+ 3
206
+ ]
207
+ ],
208
+ "stage3_out_channels": [
209
+ 256,
210
+ 256,
211
+ 256,
212
+ 256
213
+ ],
214
+ "stage3_stride": [
215
+ 2,
216
+ 1,
217
+ 1,
218
+ 1
219
+ ],
220
+ "stage4_dilation": [
221
+ 1,
222
+ 1,
223
+ 1,
224
+ 1
225
+ ],
226
+ "stage4_groups": [
227
+ 1,
228
+ 1,
229
+ 1,
230
+ 1
231
+ ],
232
+ "stage4_in_channels": [
233
+ 256,
234
+ 512,
235
+ 512,
236
+ 512
237
+ ],
238
+ "stage4_kernel_size": [
239
+ [
240
+ 3,
241
+ 3
242
+ ],
243
+ [
244
+ 3,
245
+ 1
246
+ ],
247
+ [
248
+ 1,
249
+ 3
250
+ ],
251
+ [
252
+ 3,
253
+ 3
254
+ ]
255
+ ],
256
+ "stage4_out_channels": [
257
+ 512,
258
+ 512,
259
+ 512,
260
+ 512
261
+ ],
262
+ "stage4_stride": [
263
+ 2,
264
+ 1,
265
+ 1,
266
+ 1
267
+ ],
268
+ "stage_names": [
269
+ "stem",
270
+ "stage1",
271
+ "stage2",
272
+ "stage3",
273
+ "stage4"
274
+ ],
275
+ "stride": 2,
276
+ "suppress_tokens": null,
277
+ "task_specific_params": null,
278
+ "temperature": 1.0,
279
+ "tf_legacy_loss": false,
280
+ "tie_encoder_decoder": false,
281
+ "tie_word_embeddings": true,
282
+ "tokenizer_class": null,
283
+ "top_k": 50,
284
+ "top_p": 1.0,
285
+ "torch_dtype": null,
286
+ "torchscript": false,
287
+ "typical_p": 1.0,
288
+ "use_bfloat16": false,
289
+ "use_bn": true
290
+ },
291
  "bbox_type": "rect",
292
+ "dilation": null,
293
  "head_conv_dilation": 1,
294
  "head_conv_groups": 1,
295
  "head_conv_in_channels": 512,
 
364
  1,
365
  1
366
  ],
367
+ "num_channels": 3,
368
  "torch_dtype": "float32",
369
+ "transformers_version": "4.35.0.dev0",
370
+ "use_pretrained_backbone": null,
371
+ "use_timm_backbone": false
372
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd629ea5f65d50d52fe8390961e50e91b4956d3317aef1cb96d421bf200684f0
3
- size 54302648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1e25393d1a31a8b57dbb218f81b0b930fc7de741da73d20e31e8afc10dd20ee
3
+ size 54305008