TeddyUW commited on
Commit
aa480aa
·
1 Parent(s): bbaf214

Upload converted FAST base checkpoint

Browse files
converted_fast_base/config.json ADDED
@@ -0,0 +1,314 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "FastForSceneTextRecognition"
4
+ ],
5
+ "backbone_config": {
6
+ "batch_norm_eps": 1e-05,
7
+ "conv_layer_kernel_sizes": [
8
+ [
9
+ [
10
+ 3,
11
+ 3
12
+ ],
13
+ [
14
+ 3,
15
+ 3
16
+ ],
17
+ [
18
+ 3,
19
+ 1
20
+ ],
21
+ [
22
+ 3,
23
+ 3
24
+ ],
25
+ [
26
+ 3,
27
+ 1
28
+ ],
29
+ [
30
+ 3,
31
+ 3
32
+ ],
33
+ [
34
+ 3,
35
+ 3
36
+ ],
37
+ [
38
+ 1,
39
+ 3
40
+ ],
41
+ [
42
+ 3,
43
+ 3
44
+ ],
45
+ [
46
+ 3,
47
+ 3
48
+ ]
49
+ ],
50
+ [
51
+ [
52
+ 3,
53
+ 3
54
+ ],
55
+ [
56
+ 1,
57
+ 3
58
+ ],
59
+ [
60
+ 3,
61
+ 3
62
+ ],
63
+ [
64
+ 3,
65
+ 1
66
+ ],
67
+ [
68
+ 3,
69
+ 3
70
+ ],
71
+ [
72
+ 3,
73
+ 3
74
+ ],
75
+ [
76
+ 3,
77
+ 1
78
+ ],
79
+ [
80
+ 3,
81
+ 1
82
+ ],
83
+ [
84
+ 3,
85
+ 3
86
+ ],
87
+ [
88
+ 3,
89
+ 3
90
+ ]
91
+ ],
92
+ [
93
+ [
94
+ 3,
95
+ 3
96
+ ],
97
+ [
98
+ 3,
99
+ 3
100
+ ],
101
+ [
102
+ 3,
103
+ 3
104
+ ],
105
+ [
106
+ 1,
107
+ 3
108
+ ],
109
+ [
110
+ 3,
111
+ 3
112
+ ],
113
+ [
114
+ 3,
115
+ 1
116
+ ],
117
+ [
118
+ 3,
119
+ 3
120
+ ],
121
+ [
122
+ 3,
123
+ 1
124
+ ]
125
+ ],
126
+ [
127
+ [
128
+ 3,
129
+ 3
130
+ ],
131
+ [
132
+ 1,
133
+ 3
134
+ ],
135
+ [
136
+ 3,
137
+ 1
138
+ ],
139
+ [
140
+ 3,
141
+ 1
142
+ ],
143
+ [
144
+ 1,
145
+ 3
146
+ ]
147
+ ]
148
+ ],
149
+ "conv_layer_strides": [
150
+ [
151
+ 1,
152
+ 2,
153
+ 1,
154
+ 1,
155
+ 1,
156
+ 1,
157
+ 1,
158
+ 1,
159
+ 1,
160
+ 1
161
+ ],
162
+ [
163
+ 2,
164
+ 1,
165
+ 1,
166
+ 1,
167
+ 1,
168
+ 1,
169
+ 1,
170
+ 1,
171
+ 1,
172
+ 1
173
+ ],
174
+ [
175
+ 2,
176
+ 1,
177
+ 1,
178
+ 1,
179
+ 1,
180
+ 1,
181
+ 1,
182
+ 1
183
+ ],
184
+ [
185
+ 2,
186
+ 1,
187
+ 1,
188
+ 1,
189
+ 1
190
+ ]
191
+ ],
192
+ "depths": [
193
+ 10,
194
+ 10,
195
+ 8,
196
+ 5
197
+ ],
198
+ "hidden_sizes": [
199
+ 64,
200
+ 64,
201
+ 128,
202
+ 256,
203
+ 512
204
+ ],
205
+ "image_size": [
206
+ 640,
207
+ 640
208
+ ],
209
+ "initializer_range": 0.02,
210
+ "model_type": "textnet",
211
+ "out_features": [
212
+ "stage1",
213
+ "stage2",
214
+ "stage3",
215
+ "stage4"
216
+ ],
217
+ "out_indices": [
218
+ 1,
219
+ 2,
220
+ 3,
221
+ 4
222
+ ],
223
+ "stage_names": [
224
+ "stem",
225
+ "stage1",
226
+ "stage2",
227
+ "stage3",
228
+ "stage4"
229
+ ],
230
+ "stem_act_func": "relu",
231
+ "stem_kernel_size": 3,
232
+ "stem_num_channels": 3,
233
+ "stem_out_channels": 64,
234
+ "stem_stride": 2
235
+ },
236
+ "bounding_box_type": "boxes",
237
+ "head_conv_dilation": 1,
238
+ "head_conv_groups": 1,
239
+ "head_conv_in_channels": 512,
240
+ "head_conv_kernel_size": [
241
+ 3,
242
+ 3
243
+ ],
244
+ "head_conv_out_channels": 128,
245
+ "head_conv_stride": 1,
246
+ "head_dropout_ratio": 0.1,
247
+ "head_final_act_func": null,
248
+ "head_final_bias": false,
249
+ "head_final_dilation": 1,
250
+ "head_final_dropout_rate": 0,
251
+ "head_final_groups": 1,
252
+ "head_final_has_shuffle": false,
253
+ "head_final_in_channels": 128,
254
+ "head_final_kernel_size": 1,
255
+ "head_final_ops_order": "weight",
256
+ "head_final_out_channels": 5,
257
+ "head_final_stride": 1,
258
+ "head_final_use_bn": false,
259
+ "head_pooling_size": 9,
260
+ "initializer_range": 0.02,
261
+ "loss_bg": false,
262
+ "min_area": 250,
263
+ "neck_dilation": [
264
+ 1,
265
+ 1,
266
+ 1,
267
+ 1
268
+ ],
269
+ "neck_groups": [
270
+ 1,
271
+ 1,
272
+ 1,
273
+ 1
274
+ ],
275
+ "neck_in_channels": [
276
+ 64,
277
+ 128,
278
+ 256,
279
+ 512
280
+ ],
281
+ "neck_kernel_size": [
282
+ [
283
+ 3,
284
+ 3
285
+ ],
286
+ [
287
+ 3,
288
+ 3
289
+ ],
290
+ [
291
+ 3,
292
+ 3
293
+ ],
294
+ [
295
+ 3,
296
+ 3
297
+ ]
298
+ ],
299
+ "neck_out_channels": [
300
+ 128,
301
+ 128,
302
+ 128,
303
+ 128
304
+ ],
305
+ "neck_stride": [
306
+ 1,
307
+ 1,
308
+ 1,
309
+ 1
310
+ ],
311
+ "torch_dtype": "float32",
312
+ "transformers_version": "4.55.0.dev0",
313
+ "use_timm_backbone": false
314
+ }
converted_fast_base/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71fe2bdd5c7c43e6b77a99046907f8f26fdccfe3112cacd1a14071ba8c48b2e4
3
+ size 65649136
converted_fast_base/preprocessor_config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bounding_box_type": "boxes",
3
+ "crop_size": {
4
+ "height": 224,
5
+ "width": 224
6
+ },
7
+ "do_center_crop": false,
8
+ "do_convert_rgb": true,
9
+ "do_normalize": true,
10
+ "do_rescale": true,
11
+ "do_resize": true,
12
+ "image_mean": [
13
+ 0.485,
14
+ 0.456,
15
+ 0.406
16
+ ],
17
+ "image_processor_type": "FastImageProcessor",
18
+ "image_std": [
19
+ 0.229,
20
+ 0.224,
21
+ 0.225
22
+ ],
23
+ "min_area": 250,
24
+ "pooling_size": 9,
25
+ "resample": 2,
26
+ "rescale_factor": 0.00392156862745098,
27
+ "size": {
28
+ "shortest_edge": 640
29
+ },
30
+ "size_divisor": 32
31
+ }
converted_fast_base/textnet/config.json ADDED
@@ -0,0 +1,236 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "TextNetBackbone"
4
+ ],
5
+ "batch_norm_eps": 1e-05,
6
+ "conv_layer_kernel_sizes": [
7
+ [
8
+ [
9
+ 3,
10
+ 3
11
+ ],
12
+ [
13
+ 3,
14
+ 3
15
+ ],
16
+ [
17
+ 3,
18
+ 1
19
+ ],
20
+ [
21
+ 3,
22
+ 3
23
+ ],
24
+ [
25
+ 3,
26
+ 1
27
+ ],
28
+ [
29
+ 3,
30
+ 3
31
+ ],
32
+ [
33
+ 3,
34
+ 3
35
+ ],
36
+ [
37
+ 1,
38
+ 3
39
+ ],
40
+ [
41
+ 3,
42
+ 3
43
+ ],
44
+ [
45
+ 3,
46
+ 3
47
+ ]
48
+ ],
49
+ [
50
+ [
51
+ 3,
52
+ 3
53
+ ],
54
+ [
55
+ 1,
56
+ 3
57
+ ],
58
+ [
59
+ 3,
60
+ 3
61
+ ],
62
+ [
63
+ 3,
64
+ 1
65
+ ],
66
+ [
67
+ 3,
68
+ 3
69
+ ],
70
+ [
71
+ 3,
72
+ 3
73
+ ],
74
+ [
75
+ 3,
76
+ 1
77
+ ],
78
+ [
79
+ 3,
80
+ 1
81
+ ],
82
+ [
83
+ 3,
84
+ 3
85
+ ],
86
+ [
87
+ 3,
88
+ 3
89
+ ]
90
+ ],
91
+ [
92
+ [
93
+ 3,
94
+ 3
95
+ ],
96
+ [
97
+ 3,
98
+ 3
99
+ ],
100
+ [
101
+ 3,
102
+ 3
103
+ ],
104
+ [
105
+ 1,
106
+ 3
107
+ ],
108
+ [
109
+ 3,
110
+ 3
111
+ ],
112
+ [
113
+ 3,
114
+ 1
115
+ ],
116
+ [
117
+ 3,
118
+ 3
119
+ ],
120
+ [
121
+ 3,
122
+ 1
123
+ ]
124
+ ],
125
+ [
126
+ [
127
+ 3,
128
+ 3
129
+ ],
130
+ [
131
+ 1,
132
+ 3
133
+ ],
134
+ [
135
+ 3,
136
+ 1
137
+ ],
138
+ [
139
+ 3,
140
+ 1
141
+ ],
142
+ [
143
+ 1,
144
+ 3
145
+ ]
146
+ ]
147
+ ],
148
+ "conv_layer_strides": [
149
+ [
150
+ 1,
151
+ 2,
152
+ 1,
153
+ 1,
154
+ 1,
155
+ 1,
156
+ 1,
157
+ 1,
158
+ 1,
159
+ 1
160
+ ],
161
+ [
162
+ 2,
163
+ 1,
164
+ 1,
165
+ 1,
166
+ 1,
167
+ 1,
168
+ 1,
169
+ 1,
170
+ 1,
171
+ 1
172
+ ],
173
+ [
174
+ 2,
175
+ 1,
176
+ 1,
177
+ 1,
178
+ 1,
179
+ 1,
180
+ 1,
181
+ 1
182
+ ],
183
+ [
184
+ 2,
185
+ 1,
186
+ 1,
187
+ 1,
188
+ 1
189
+ ]
190
+ ],
191
+ "depths": [
192
+ 10,
193
+ 10,
194
+ 8,
195
+ 5
196
+ ],
197
+ "hidden_sizes": [
198
+ 64,
199
+ 64,
200
+ 128,
201
+ 256,
202
+ 512
203
+ ],
204
+ "image_size": [
205
+ 640,
206
+ 640
207
+ ],
208
+ "initializer_range": 0.02,
209
+ "model_type": "textnet",
210
+ "out_features": [
211
+ "stage1",
212
+ "stage2",
213
+ "stage3",
214
+ "stage4"
215
+ ],
216
+ "out_indices": [
217
+ 1,
218
+ 2,
219
+ 3,
220
+ 4
221
+ ],
222
+ "stage_names": [
223
+ "stem",
224
+ "stage1",
225
+ "stage2",
226
+ "stage3",
227
+ "stage4"
228
+ ],
229
+ "stem_act_func": "relu",
230
+ "stem_kernel_size": 3,
231
+ "stem_num_channels": 3,
232
+ "stem_out_channels": 64,
233
+ "stem_stride": 2,
234
+ "torch_dtype": "float32",
235
+ "transformers_version": "4.55.0.dev0"
236
+ }
converted_fast_base/textnet/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d414e7a89a7709dbc14de450ad52dadc9796ff40b9b74540066132a4410fe724
3
+ size 54291592