594zyc commited on
Commit
4a86f1a
·
verified ·
1 Parent(s): 716ec67

Upload config.yaml with huggingface_hub

Browse files
Files changed (1) hide show
  1. config.yaml +421 -0
config.yaml ADDED
@@ -0,0 +1,421 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ CUDNN_BENCHMARK: false
2
+ DATALOADER:
3
+ ASPECT_RATIO_GROUPING: true
4
+ FILTER_EMPTY_ANNOTATIONS: true
5
+ NUM_WORKERS: 4
6
+ REPEAT_THRESHOLD: 0.0
7
+ SAMPLER_TRAIN: TrainingSampler
8
+ DATASETS:
9
+ PRECOMPUTED_PROPOSAL_TOPK_TEST: 1000
10
+ PRECOMPUTED_PROPOSAL_TOPK_TRAIN: 2000
11
+ PROPOSAL_FILES_TEST: []
12
+ PROPOSAL_FILES_TRAIN: []
13
+ TEST: []
14
+ TRAIN:
15
+ - coco_lvis_no_part_train
16
+ - coco_lvis_paco_part_train
17
+ - ppp_train
18
+ - ppp_val
19
+ - entity_train_lr_with_cls
20
+ - entity_train_lr_no_cls
21
+ - textocr_train
22
+ - mhp_train
23
+ GLOBAL:
24
+ HACK: 1.0
25
+ INPUT:
26
+ COLOR_AUG_SSD: false
27
+ CROP:
28
+ ENABLED: false
29
+ SINGLE_CATEGORY_MAX_AREA: 1.0
30
+ SIZE:
31
+ - 0.9
32
+ - 0.9
33
+ TYPE: relative_range
34
+ DATASET_MAPPER_NAME: coco_panoptic_lsj
35
+ FORMAT: RGB
36
+ IMAGE_SIZE: 1024
37
+ MASK_FORMAT: polygon
38
+ MAX_SCALE: 2.0
39
+ MAX_SIZE_TEST: 1333
40
+ MAX_SIZE_TRAIN: 1333
41
+ MIN_SCALE: 0.1
42
+ MIN_SIZE_TEST: 800
43
+ MIN_SIZE_TRAIN:
44
+ - 800
45
+ MIN_SIZE_TRAIN_SAMPLING: choice
46
+ RANDOM_FLIP: horizontal
47
+ SIZE_DIVISIBILITY: -1
48
+ MODEL:
49
+ ANCHOR_GENERATOR:
50
+ ANGLES:
51
+ - - -90
52
+ - 0
53
+ - 90
54
+ ASPECT_RATIOS:
55
+ - - 0.5
56
+ - 1.0
57
+ - 2.0
58
+ NAME: DefaultAnchorGenerator
59
+ OFFSET: 0.0
60
+ SIZES:
61
+ - - 32
62
+ - 64
63
+ - 128
64
+ - 256
65
+ - 512
66
+ BACKBONE:
67
+ FREEZE_AT: 0
68
+ NAME: D2SwinTransformer
69
+ DEVICE: cuda
70
+ FPN:
71
+ FUSE_TYPE: sum
72
+ IN_FEATURES: []
73
+ NORM: ''
74
+ OUT_CHANNELS: 256
75
+ KEYPOINT_ON: false
76
+ LOAD_PROPOSALS: false
77
+ MASK_FORMER:
78
+ CLASS_WEIGHT: 0.0
79
+ CLIP: ViT-L-14-336/openai
80
+ DEC_LAYERS: 10
81
+ DEEP_SUPERVISION: true
82
+ DICE_WEIGHT: 5.0
83
+ DIM_FEEDFORWARD: 2048
84
+ DROPOUT: 0.0
85
+ ENC_LAYERS: 0
86
+ ENFORCE_INPUT_PROJ: false
87
+ HIDDEN_DIM: 256
88
+ IMPORTANCE_SAMPLE_RATIO: 0.75
89
+ MASK_WEIGHT: 5.0
90
+ NHEADS: 8
91
+ NO_OBJECT_WEIGHT: 0.1
92
+ NUM_OBJECT_QUERIES: 200
93
+ NUM_PART_QUERIES: 50
94
+ NUM_TEXT_QUERIES: 50
95
+ OVERSAMPLE_RATIO: 3.0
96
+ PRE_NORM: false
97
+ SEM_EMBED_DIM: 768
98
+ SIZE_DIVISIBILITY: 32
99
+ TEST:
100
+ INSTANCE_ON: true
101
+ OBJECT_MASK_THRESHOLD: 0.8
102
+ OVERLAP_THRESHOLD: 0.8
103
+ PANOPTIC_ON: true
104
+ SEMANTIC_ON: true
105
+ SEM_SEG_POSTPROCESSING_BEFORE_INFERENCE: false
106
+ TRAIN_NUM_POINTS: 12544
107
+ TRANSFORMER_DECODER_NAME: MultiScaleMaskedTransformerDecoderPlus
108
+ TRANSFORMER_IN_FEATURE: multi_scale_pixel_decoder
109
+ MASK_ON: false
110
+ META_ARCHITECTURE: MaskFormerPlusNoSem
111
+ MULTI_DATASET_TRAIN: true
112
+ PANOPTIC_FPN:
113
+ COMBINE:
114
+ ENABLED: true
115
+ INSTANCES_CONFIDENCE_THRESH: 0.5
116
+ OVERLAP_THRESH: 0.5
117
+ STUFF_AREA_LIMIT: 4096
118
+ INSTANCE_LOSS_WEIGHT: 1.0
119
+ PIXEL_MEAN:
120
+ - 123.675
121
+ - 116.28
122
+ - 103.53
123
+ PIXEL_STD:
124
+ - 58.395
125
+ - 57.12
126
+ - 57.375
127
+ PROPOSAL_GENERATOR:
128
+ MIN_SIZE: 0
129
+ NAME: RPN
130
+ RESNETS:
131
+ DEFORM_MODULATED: false
132
+ DEFORM_NUM_GROUPS: 1
133
+ DEFORM_ON_PER_STAGE:
134
+ - false
135
+ - false
136
+ - false
137
+ - false
138
+ DEPTH: 50
139
+ NORM: FrozenBN
140
+ NUM_GROUPS: 1
141
+ OUT_FEATURES:
142
+ - res2
143
+ - res3
144
+ - res4
145
+ - res5
146
+ RES2_OUT_CHANNELS: 256
147
+ RES4_DILATION: 1
148
+ RES5_DILATION: 1
149
+ RES5_MULTI_GRID:
150
+ - 1
151
+ - 1
152
+ - 1
153
+ STEM_OUT_CHANNELS: 64
154
+ STEM_TYPE: basic
155
+ STRIDE_IN_1X1: false
156
+ WIDTH_PER_GROUP: 64
157
+ RETINANET:
158
+ BBOX_REG_LOSS_TYPE: smooth_l1
159
+ BBOX_REG_WEIGHTS: &id002
160
+ - 1.0
161
+ - 1.0
162
+ - 1.0
163
+ - 1.0
164
+ FOCAL_LOSS_ALPHA: 0.25
165
+ FOCAL_LOSS_GAMMA: 2.0
166
+ IN_FEATURES:
167
+ - p3
168
+ - p4
169
+ - p5
170
+ - p6
171
+ - p7
172
+ IOU_LABELS:
173
+ - 0
174
+ - -1
175
+ - 1
176
+ IOU_THRESHOLDS:
177
+ - 0.4
178
+ - 0.5
179
+ NMS_THRESH_TEST: 0.5
180
+ NORM: ''
181
+ NUM_CLASSES: 80
182
+ NUM_CONVS: 4
183
+ PRIOR_PROB: 0.01
184
+ SCORE_THRESH_TEST: 0.05
185
+ SMOOTH_L1_LOSS_BETA: 0.1
186
+ TOPK_CANDIDATES_TEST: 1000
187
+ ROI_BOX_CASCADE_HEAD:
188
+ BBOX_REG_WEIGHTS:
189
+ - &id001
190
+ - 10.0
191
+ - 10.0
192
+ - 5.0
193
+ - 5.0
194
+ - - 20.0
195
+ - 20.0
196
+ - 10.0
197
+ - 10.0
198
+ - - 30.0
199
+ - 30.0
200
+ - 15.0
201
+ - 15.0
202
+ IOUS:
203
+ - 0.5
204
+ - 0.6
205
+ - 0.7
206
+ ROI_BOX_HEAD:
207
+ BBOX_REG_LOSS_TYPE: smooth_l1
208
+ BBOX_REG_LOSS_WEIGHT: 1.0
209
+ BBOX_REG_WEIGHTS: *id001
210
+ CLS_AGNOSTIC_BBOX_REG: false
211
+ CONV_DIM: 256
212
+ FC_DIM: 1024
213
+ FED_LOSS_FREQ_WEIGHT_POWER: 0.5
214
+ FED_LOSS_NUM_CLASSES: 50
215
+ NAME: ''
216
+ NORM: ''
217
+ NUM_CONV: 0
218
+ NUM_FC: 0
219
+ POOLER_RESOLUTION: 14
220
+ POOLER_SAMPLING_RATIO: 0
221
+ POOLER_TYPE: ROIAlignV2
222
+ SMOOTH_L1_BETA: 0.0
223
+ TRAIN_ON_PRED_BOXES: false
224
+ USE_FED_LOSS: false
225
+ USE_SIGMOID_CE: false
226
+ ROI_HEADS:
227
+ BATCH_SIZE_PER_IMAGE: 512
228
+ IN_FEATURES:
229
+ - res4
230
+ IOU_LABELS:
231
+ - 0
232
+ - 1
233
+ IOU_THRESHOLDS:
234
+ - 0.5
235
+ NAME: Res5ROIHeads
236
+ NMS_THRESH_TEST: 0.5
237
+ NUM_CLASSES: 80
238
+ POSITIVE_FRACTION: 0.25
239
+ PROPOSAL_APPEND_GT: true
240
+ SCORE_THRESH_TEST: 0.05
241
+ ROI_KEYPOINT_HEAD:
242
+ CONV_DIMS:
243
+ - 512
244
+ - 512
245
+ - 512
246
+ - 512
247
+ - 512
248
+ - 512
249
+ - 512
250
+ - 512
251
+ LOSS_WEIGHT: 1.0
252
+ MIN_KEYPOINTS_PER_IMAGE: 1
253
+ NAME: KRCNNConvDeconvUpsampleHead
254
+ NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: true
255
+ NUM_KEYPOINTS: 17
256
+ POOLER_RESOLUTION: 14
257
+ POOLER_SAMPLING_RATIO: 0
258
+ POOLER_TYPE: ROIAlignV2
259
+ ROI_MASK_HEAD:
260
+ CLS_AGNOSTIC_MASK: false
261
+ CONV_DIM: 256
262
+ NAME: MaskRCNNConvUpsampleHead
263
+ NORM: ''
264
+ NUM_CONV: 0
265
+ POOLER_RESOLUTION: 14
266
+ POOLER_SAMPLING_RATIO: 0
267
+ POOLER_TYPE: ROIAlignV2
268
+ RPN:
269
+ BATCH_SIZE_PER_IMAGE: 256
270
+ BBOX_REG_LOSS_TYPE: smooth_l1
271
+ BBOX_REG_LOSS_WEIGHT: 1.0
272
+ BBOX_REG_WEIGHTS: *id002
273
+ BOUNDARY_THRESH: -1
274
+ CONV_DIMS:
275
+ - -1
276
+ HEAD_NAME: StandardRPNHead
277
+ IN_FEATURES:
278
+ - res4
279
+ IOU_LABELS:
280
+ - 0
281
+ - -1
282
+ - 1
283
+ IOU_THRESHOLDS:
284
+ - 0.3
285
+ - 0.7
286
+ LOSS_WEIGHT: 1.0
287
+ NMS_THRESH: 0.7
288
+ POSITIVE_FRACTION: 0.5
289
+ POST_NMS_TOPK_TEST: 1000
290
+ POST_NMS_TOPK_TRAIN: 2000
291
+ PRE_NMS_TOPK_TEST: 6000
292
+ PRE_NMS_TOPK_TRAIN: 12000
293
+ SMOOTH_L1_BETA: 0.0
294
+ SEM_SEG_HEAD:
295
+ ASPP_CHANNELS: 256
296
+ ASPP_DILATIONS:
297
+ - 6
298
+ - 12
299
+ - 18
300
+ ASPP_DROPOUT: 0.1
301
+ COMMON_STRIDE: 4
302
+ CONVS_DIM: 256
303
+ DEFORMABLE_TRANSFORMER_ENCODER_IN_FEATURES:
304
+ - res3
305
+ - res4
306
+ - res5
307
+ DEFORMABLE_TRANSFORMER_ENCODER_N_HEADS: 8
308
+ DEFORMABLE_TRANSFORMER_ENCODER_N_POINTS: 4
309
+ IGNORE_VALUE: 255
310
+ IN_FEATURES:
311
+ - res2
312
+ - res3
313
+ - res4
314
+ - res5
315
+ LOSS_TYPE: hard_pixel_mining
316
+ LOSS_WEIGHT: 1.0
317
+ MASK_DIM: 256
318
+ NAME: MaskFormerHead
319
+ NORM: GN
320
+ NUM_CLASSES: 1
321
+ PIXEL_DECODER_NAME: MSDeformAttnPixelDecoder
322
+ PROJECT_CHANNELS:
323
+ - 48
324
+ PROJECT_FEATURES:
325
+ - res2
326
+ TRANSFORMER_ENC_LAYERS: 6
327
+ USE_DEPTHWISE_SEPARABLE_CONV: false
328
+ SWIN:
329
+ APE: false
330
+ ATTN_DROP_RATE: 0.0
331
+ DEPTHS:
332
+ - 2
333
+ - 2
334
+ - 18
335
+ - 2
336
+ DROP_PATH_RATE: 0.3
337
+ DROP_RATE: 0.0
338
+ EMBED_DIM: 192
339
+ MLP_RATIO: 4.0
340
+ NUM_HEADS:
341
+ - 6
342
+ - 12
343
+ - 24
344
+ - 48
345
+ OUT_FEATURES:
346
+ - res2
347
+ - res3
348
+ - res4
349
+ - res5
350
+ PATCH_NORM: true
351
+ PATCH_SIZE: 4
352
+ PRETRAIN_IMG_SIZE: 384
353
+ QKV_BIAS: true
354
+ QK_SCALE: null
355
+ USE_CHECKPOINT: false
356
+ WINDOW_SIZE: 12
357
+ WEIGHTS: weights/coco_panoptic_swinl_100ep/model_final.pkl
358
+ OUTPUT_DIR: runs/swinl-all_data-q200+50+50-res1024-bs16-poly50k-nosem-hasnoobj-debug
359
+ SEED: -1
360
+ SOLVER:
361
+ AMP:
362
+ ENABLED: true
363
+ BACKBONE_MULTIPLIER: 0.1
364
+ BASE_LR: 0.0001
365
+ BASE_LR_END: 0.0
366
+ BIAS_LR_FACTOR: 1.0
367
+ CHECKPOINT_PERIOD: 5000
368
+ CLIP_GRADIENTS:
369
+ CLIP_TYPE: full_model
370
+ CLIP_VALUE: 0.01
371
+ ENABLED: true
372
+ NORM_TYPE: 2.0
373
+ GAMMA: 0.1
374
+ IMS_PER_BATCH: 16
375
+ LR_SCHEDULER_NAME: WarmupPolyLR
376
+ MAX_ITER: 100000
377
+ MOMENTUM: 0.9
378
+ NESTEROV: false
379
+ NUM_DECAYS: 3
380
+ OPTIMIZER: ADAMW
381
+ POLY_LR_CONSTANT_ENDING: 0.0
382
+ POLY_LR_POWER: 0.9
383
+ REFERENCE_WORLD_SIZE: 0
384
+ RESCALE_INTERVAL: false
385
+ STEPS:
386
+ - 655556
387
+ - 710184
388
+ WARMUP_FACTOR: 1.0
389
+ WARMUP_ITERS: 10
390
+ WARMUP_METHOD: linear
391
+ WEIGHT_DECAY: 0.05
392
+ WEIGHT_DECAY_BIAS: null
393
+ WEIGHT_DECAY_EMBED: 0.0
394
+ WEIGHT_DECAY_NORM: 0.0
395
+ TEST:
396
+ AUG:
397
+ ENABLED: false
398
+ FLIP: true
399
+ MAX_SIZE: 4000
400
+ MIN_SIZES:
401
+ - 400
402
+ - 500
403
+ - 600
404
+ - 700
405
+ - 800
406
+ - 900
407
+ - 1000
408
+ - 1100
409
+ - 1200
410
+ DETECTIONS_PER_IMAGE: 100
411
+ EVAL_PERIOD: 100000000
412
+ EXPECTED_RESULTS: []
413
+ KEYPOINT_OKS_SIGMAS: []
414
+ PRECISE_BN:
415
+ ENABLED: false
416
+ NUM_ITER: 200
417
+ VERSION: 2
418
+ VIS_PERIOD: 0
419
+ WANDB:
420
+ NAME: swinl-all_data-q200+50+50-res1024-bs16-poly50k-nosem-hasnoobj-debug
421
+ PROJECT: mask2former