wkpark commited on
Commit
00908c1
·
verified ·
1 Parent(s): 839c33b

add configs from the mmdetection repo with minor fixes

Browse files
mmdet/segm/coco_panoptic.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = "CocoPanopticDataset"
3
+ # data_root = 'data/coco/'
4
+
5
+ # Example to use different file client
6
+ # Method 1: simply set the data root and let the file I/O module
7
+ # automatically infer from prefix (not support LMDB and Memcache yet)
8
+
9
+ data_root = "s3://openmmlab/datasets/detection/coco/"
10
+
11
+ # Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
12
+ # backend_args = dict(
13
+ # backend='petrel',
14
+ # path_mapping=dict({
15
+ # './data/': 's3://openmmlab/datasets/detection/',
16
+ # 'data/': 's3://openmmlab/datasets/detection/'
17
+ # }))
18
+ backend_args = None
19
+
20
+ train_pipeline = [
21
+ dict(type="LoadImageFromFile", backend_args=backend_args),
22
+ dict(type="LoadPanopticAnnotations", backend_args=backend_args),
23
+ dict(type="Resize", scale=(1333, 800), keep_ratio=True),
24
+ dict(type="RandomFlip", prob=0.5),
25
+ dict(type="PackDetInputs"),
26
+ ]
27
+ test_pipeline = [
28
+ dict(type="LoadImageFromFile", backend_args=backend_args),
29
+ dict(type="Resize", scale=(1333, 800), keep_ratio=True),
30
+ dict(type="LoadPanopticAnnotations", backend_args=backend_args),
31
+ dict(
32
+ type="PackDetInputs",
33
+ meta_keys=("img_id", "img_path", "ori_shape", "img_shape", "scale_factor"),
34
+ ),
35
+ ]
36
+
37
+ train_dataloader = dict(
38
+ batch_size=2,
39
+ num_workers=2,
40
+ persistent_workers=True,
41
+ sampler=dict(type="DefaultSampler", shuffle=True),
42
+ batch_sampler=dict(type="AspectRatioBatchSampler"),
43
+ dataset=dict(
44
+ type=dataset_type,
45
+ data_root=data_root,
46
+ ann_file="annotations/panoptic_train2017.json",
47
+ data_prefix=dict(img="train2017/", seg="annotations/panoptic_train2017/"),
48
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
49
+ pipeline=train_pipeline,
50
+ backend_args=backend_args,
51
+ ),
52
+ )
53
+ val_dataloader = dict(
54
+ batch_size=1,
55
+ num_workers=2,
56
+ persistent_workers=True,
57
+ drop_last=False,
58
+ sampler=dict(type="DefaultSampler", shuffle=False),
59
+ dataset=dict(
60
+ type=dataset_type,
61
+ data_root=data_root,
62
+ ann_file="annotations/panoptic_val2017.json",
63
+ data_prefix=dict(img="val2017/", seg="annotations/panoptic_val2017/"),
64
+ test_mode=True,
65
+ pipeline=test_pipeline,
66
+ backend_args=backend_args,
67
+ ),
68
+ )
69
+ test_dataloader = val_dataloader
70
+
71
+ val_evaluator = dict(
72
+ type="CocoPanopticMetric",
73
+ ann_file=data_root + "annotations/panoptic_val2017.json",
74
+ seg_prefix=data_root + "annotations/panoptic_val2017/",
75
+ backend_args=backend_args,
76
+ )
77
+ test_evaluator = val_evaluator
78
+
79
+ # inference on test dataset and
80
+ # format the output results for submission.
81
+ # test_dataloader = dict(
82
+ # batch_size=1,
83
+ # num_workers=1,
84
+ # persistent_workers=True,
85
+ # drop_last=False,
86
+ # sampler=dict(type='DefaultSampler', shuffle=False),
87
+ # dataset=dict(
88
+ # type=dataset_type,
89
+ # data_root=data_root,
90
+ # ann_file='annotations/panoptic_image_info_test-dev2017.json',
91
+ # data_prefix=dict(img='test2017/'),
92
+ # test_mode=True,
93
+ # pipeline=test_pipeline))
94
+ # test_evaluator = dict(
95
+ # type='CocoPanopticMetric',
96
+ # format_only=True,
97
+ # ann_file=data_root + 'annotations/panoptic_image_info_test-dev2017.json',
98
+ # outfile_prefix='./work_dirs/coco_panoptic/test')
mmdet/segm/default_runtime.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ default_scope = 'mmdet'
2
+
3
+ default_hooks = dict(
4
+ timer=dict(type='IterTimerHook'),
5
+ logger=dict(type='LoggerHook', interval=50),
6
+ param_scheduler=dict(type='ParamSchedulerHook'),
7
+ checkpoint=dict(type='CheckpointHook', interval=1),
8
+ sampler_seed=dict(type='DistSamplerSeedHook'),
9
+ visualization=dict(type='mmdet.DetVisualizationHook'))
10
+
11
+ env_cfg = dict(
12
+ cudnn_benchmark=False,
13
+ mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
14
+ dist_cfg=dict(backend='nccl'),
15
+ )
16
+
17
+ vis_backends = [dict(type='LocalVisBackend')]
18
+ visualizer = dict(
19
+ type='mmdet.DetLocalVisualizer',
20
+ vis_backends=vis_backends,
21
+ name='visualizer')
22
+ log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True)
23
+
24
+ log_level = 'INFO'
25
+ load_from = None
26
+ resume = False
27
+
28
+ # Example to use different file client
29
+ # Method 1: simply set the data root and let the file I/O module
30
+ # automatically infer from prefix (not support LMDB and Memcache yet)
31
+
32
+ # data_root = 's3://openmmlab/datasets/detection/coco/'
33
+
34
+ # Method 2: Use `backend_args`, `file_client_args` in versions
35
+ # before MMDet 3.0.0rc6
36
+ # backend_args = dict(
37
+ # backend='petrel',
38
+ # path_mapping=dict({
39
+ # './data/': 's3://openmmlab/datasets/detection/',
40
+ # 'data/': 's3://openmmlab/datasets/detection/'
41
+ # }))
42
+ #backend_args = None
mmdet/segm/mask2former_r50_8xb2-lsj-50e_coco-panoptic.py ADDED
@@ -0,0 +1,253 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # copy from https://raw.githubusercontent.com/open-mmlab/mmdetection/main/configs/mask2former/mask2former_r50_8xb2-lsj-50e_coco-panoptic.py
2
+ _base_ = [
3
+ 'coco_panoptic.py', 'default_runtime.py'
4
+ ]
5
+
6
+ image_size = (1024, 1024)
7
+ batch_augments = [
8
+ dict(
9
+ type='BatchFixedSizePad',
10
+ size=image_size,
11
+ img_pad_value=0,
12
+ pad_mask=True,
13
+ mask_pad_value=0,
14
+ pad_seg=True,
15
+ seg_pad_value=255)
16
+ ]
17
+ data_preprocessor = dict(
18
+ type='DetDataPreprocessor',
19
+ mean=[123.675, 116.28, 103.53],
20
+ std=[58.395, 57.12, 57.375],
21
+ bgr_to_rgb=True,
22
+ pad_size_divisor=32,
23
+ pad_mask=True,
24
+ mask_pad_value=0,
25
+ pad_seg=True,
26
+ seg_pad_value=255,
27
+ batch_augments=batch_augments)
28
+
29
+ num_things_classes = 80
30
+ num_stuff_classes = 53
31
+ num_classes = num_things_classes + num_stuff_classes
32
+ model = dict(
33
+ type='Mask2Former',
34
+ data_preprocessor=data_preprocessor,
35
+ backbone=dict(
36
+ type='ResNet',
37
+ depth=50,
38
+ num_stages=4,
39
+ out_indices=(0, 1, 2, 3),
40
+ frozen_stages=-1,
41
+ norm_cfg=dict(type='BN', requires_grad=False),
42
+ norm_eval=True,
43
+ style='pytorch',
44
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
45
+ panoptic_head=dict(
46
+ type='Mask2FormerHead',
47
+ in_channels=[256, 512, 1024, 2048], # pass to pixel_decoder inside
48
+ strides=[4, 8, 16, 32],
49
+ feat_channels=256,
50
+ out_channels=256,
51
+ num_things_classes=num_things_classes,
52
+ num_stuff_classes=num_stuff_classes,
53
+ num_queries=100,
54
+ num_transformer_feat_level=3,
55
+ pixel_decoder=dict(
56
+ type='MSDeformAttnPixelDecoder',
57
+ num_outs=3,
58
+ norm_cfg=dict(type='GN', num_groups=32),
59
+ act_cfg=dict(type='ReLU'),
60
+ encoder=dict( # DeformableDetrTransformerEncoder
61
+ num_layers=6,
62
+ layer_cfg=dict( # DeformableDetrTransformerEncoderLayer
63
+ self_attn_cfg=dict( # MultiScaleDeformableAttention
64
+ embed_dims=256,
65
+ num_heads=8,
66
+ num_levels=3,
67
+ num_points=4,
68
+ dropout=0.0,
69
+ batch_first=True),
70
+ ffn_cfg=dict(
71
+ embed_dims=256,
72
+ feedforward_channels=1024,
73
+ num_fcs=2,
74
+ ffn_drop=0.0,
75
+ act_cfg=dict(type='ReLU', inplace=True)))),
76
+ positional_encoding=dict(num_feats=128, normalize=True)),
77
+ enforce_decoder_input_project=False,
78
+ positional_encoding=dict(num_feats=128, normalize=True),
79
+ transformer_decoder=dict( # Mask2FormerTransformerDecoder
80
+ return_intermediate=True,
81
+ num_layers=9,
82
+ layer_cfg=dict( # Mask2FormerTransformerDecoderLayer
83
+ self_attn_cfg=dict( # MultiheadAttention
84
+ embed_dims=256,
85
+ num_heads=8,
86
+ dropout=0.0,
87
+ batch_first=True),
88
+ cross_attn_cfg=dict( # MultiheadAttention
89
+ embed_dims=256,
90
+ num_heads=8,
91
+ dropout=0.0,
92
+ batch_first=True),
93
+ ffn_cfg=dict(
94
+ embed_dims=256,
95
+ feedforward_channels=2048,
96
+ num_fcs=2,
97
+ ffn_drop=0.0,
98
+ act_cfg=dict(type='ReLU', inplace=True))),
99
+ init_cfg=None),
100
+ loss_cls=dict(
101
+ type='CrossEntropyLoss',
102
+ use_sigmoid=False,
103
+ loss_weight=2.0,
104
+ reduction='mean',
105
+ class_weight=[1.0] * num_classes + [0.1]),
106
+ loss_mask=dict(
107
+ type='CrossEntropyLoss',
108
+ use_sigmoid=True,
109
+ reduction='mean',
110
+ loss_weight=5.0),
111
+ loss_dice=dict(
112
+ type='DiceLoss',
113
+ use_sigmoid=True,
114
+ activate=True,
115
+ reduction='mean',
116
+ naive_dice=True,
117
+ eps=1.0,
118
+ loss_weight=5.0)),
119
+ panoptic_fusion_head=dict(
120
+ type='MaskFormerFusionHead',
121
+ num_things_classes=num_things_classes,
122
+ num_stuff_classes=num_stuff_classes,
123
+ loss_panoptic=None,
124
+ init_cfg=None),
125
+ train_cfg=dict(
126
+ num_points=12544,
127
+ oversample_ratio=3.0,
128
+ importance_sample_ratio=0.75,
129
+ assigner=dict(
130
+ type='HungarianAssigner',
131
+ match_costs=[
132
+ dict(type='ClassificationCost', weight=2.0),
133
+ dict(
134
+ type='CrossEntropyLossCost', weight=5.0, use_sigmoid=True),
135
+ dict(type='DiceCost', weight=5.0, pred_act=True, eps=1.0)
136
+ ]),
137
+ sampler=dict(type='MaskPseudoSampler')),
138
+ test_cfg=dict(
139
+ panoptic_on=True,
140
+ # For now, the dataset does not support
141
+ # evaluating semantic segmentation metric.
142
+ semantic_on=False,
143
+ instance_on=True,
144
+ # max_per_image is for instance segmentation.
145
+ max_per_image=100,
146
+ iou_thr=0.8,
147
+ # In Mask2Former's panoptic postprocessing,
148
+ # it will filter mask area where score is less than 0.5 .
149
+ filter_low_score=True),
150
+ init_cfg=None)
151
+
152
+ # dataset settings
153
+ data_root = 'data/coco/'
154
+ train_pipeline = [
155
+ dict(
156
+ type='LoadImageFromFile',
157
+ to_float32=True,
158
+ backend_args={{_base_.backend_args}}),
159
+ dict(
160
+ type='LoadPanopticAnnotations',
161
+ with_bbox=True,
162
+ with_mask=True,
163
+ with_seg=True,
164
+ backend_args={{_base_.backend_args}}),
165
+ dict(type='RandomFlip', prob=0.5),
166
+ # large scale jittering
167
+ dict(
168
+ type='RandomResize',
169
+ scale=image_size,
170
+ ratio_range=(0.1, 2.0),
171
+ keep_ratio=True),
172
+ dict(
173
+ type='RandomCrop',
174
+ crop_size=image_size,
175
+ crop_type='absolute',
176
+ recompute_bbox=True,
177
+ allow_negative_crop=True),
178
+ dict(type='PackDetInputs')
179
+ ]
180
+
181
+ train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
182
+
183
+ val_evaluator = [
184
+ dict(
185
+ type='CocoPanopticMetric',
186
+ ann_file=data_root + 'annotations/panoptic_val2017.json',
187
+ seg_prefix=data_root + 'annotations/panoptic_val2017/',
188
+ backend_args={{_base_.backend_args}}),
189
+ dict(
190
+ type='CocoMetric',
191
+ ann_file=data_root + 'annotations/instances_val2017.json',
192
+ metric=['bbox', 'segm'],
193
+ backend_args={{_base_.backend_args}})
194
+ ]
195
+ test_evaluator = val_evaluator
196
+
197
+ # optimizer
198
+ embed_multi = dict(lr_mult=1.0, decay_mult=0.0)
199
+ optim_wrapper = dict(
200
+ type='OptimWrapper',
201
+ optimizer=dict(
202
+ type='AdamW',
203
+ lr=0.0001,
204
+ weight_decay=0.05,
205
+ eps=1e-8,
206
+ betas=(0.9, 0.999)),
207
+ paramwise_cfg=dict(
208
+ custom_keys={
209
+ 'backbone': dict(lr_mult=0.1, decay_mult=1.0),
210
+ 'query_embed': embed_multi,
211
+ 'query_feat': embed_multi,
212
+ 'level_embed': embed_multi,
213
+ },
214
+ norm_decay_mult=0.0),
215
+ clip_grad=dict(max_norm=0.01, norm_type=2))
216
+
217
+ # learning policy
218
+ max_iters = 368750
219
+ param_scheduler = dict(
220
+ type='MultiStepLR',
221
+ begin=0,
222
+ end=max_iters,
223
+ by_epoch=False,
224
+ milestones=[327778, 355092],
225
+ gamma=0.1)
226
+
227
+ # Before 365001th iteration, we do evaluation every 5000 iterations.
228
+ # After 365000th iteration, we do evaluation every 368750 iterations,
229
+ # which means that we do evaluation at the end of training.
230
+ interval = 5000
231
+ dynamic_intervals = [(max_iters // interval * interval + 1, max_iters)]
232
+ train_cfg = dict(
233
+ type='IterBasedTrainLoop',
234
+ max_iters=max_iters,
235
+ val_interval=interval,
236
+ dynamic_intervals=dynamic_intervals)
237
+ val_cfg = dict(type='ValLoop')
238
+ test_cfg = dict(type='TestLoop')
239
+
240
+ default_hooks = dict(
241
+ checkpoint=dict(
242
+ type='CheckpointHook',
243
+ by_epoch=False,
244
+ save_last=True,
245
+ max_keep_ckpts=3,
246
+ interval=interval))
247
+ log_processor = dict(type='LogProcessor', window_size=50, by_epoch=False)
248
+
249
+ # Default setting for scaling LR automatically
250
+ # - `enable` means enable scaling LR automatically
251
+ # or not by default.
252
+ # - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
253
+ auto_scale_lr = dict(enable=False, base_batch_size=16)
mmdet/segm/mask2former_r50_8xb2-lsj-50e_coco.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = ['./mask2former_r50_8xb2-lsj-50e_coco-panoptic.py']
2
+
3
+ num_things_classes = 80
4
+ num_stuff_classes = 0
5
+ num_classes = num_things_classes + num_stuff_classes
6
+ image_size = (1024, 1024)
7
+ batch_augments = [
8
+ dict(
9
+ type='BatchFixedSizePad',
10
+ size=image_size,
11
+ img_pad_value=0,
12
+ pad_mask=True,
13
+ mask_pad_value=0,
14
+ pad_seg=False)
15
+ ]
16
+ data_preprocessor = dict(
17
+ type='DetDataPreprocessor',
18
+ mean=[123.675, 116.28, 103.53],
19
+ std=[58.395, 57.12, 57.375],
20
+ bgr_to_rgb=True,
21
+ pad_size_divisor=32,
22
+ pad_mask=True,
23
+ mask_pad_value=0,
24
+ pad_seg=False,
25
+ batch_augments=batch_augments)
26
+ model = dict(
27
+ data_preprocessor=data_preprocessor,
28
+ panoptic_head=dict(
29
+ num_things_classes=num_things_classes,
30
+ num_stuff_classes=num_stuff_classes,
31
+ loss_cls=dict(class_weight=[1.0] * num_classes + [0.1])),
32
+ panoptic_fusion_head=dict(
33
+ num_things_classes=num_things_classes,
34
+ num_stuff_classes=num_stuff_classes),
35
+ test_cfg=dict(panoptic_on=False))
36
+
37
+ # dataset settings
38
+ train_pipeline = [
39
+ dict(
40
+ type='LoadImageFromFile',
41
+ to_float32=True,
42
+ backend_args={{_base_.backend_args}}),
43
+ dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
44
+ dict(type='RandomFlip', prob=0.5),
45
+ # large scale jittering
46
+ dict(
47
+ type='RandomResize',
48
+ scale=image_size,
49
+ ratio_range=(0.1, 2.0),
50
+ resize_type='Resize',
51
+ keep_ratio=True),
52
+ dict(
53
+ type='RandomCrop',
54
+ crop_size=image_size,
55
+ crop_type='absolute',
56
+ recompute_bbox=True,
57
+ allow_negative_crop=True),
58
+ dict(type='FilterAnnotations', min_gt_bbox_wh=(1e-5, 1e-5), by_mask=True),
59
+ dict(type='PackDetInputs')
60
+ ]
61
+
62
+ test_pipeline = [
63
+ dict(
64
+ type='LoadImageFromFile',
65
+ to_float32=True,
66
+ backend_args={{_base_.backend_args}}),
67
+ dict(type='Resize', scale=(1333, 800), keep_ratio=True),
68
+ # If you don't have a gt annotation, delete the pipeline
69
+ dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
70
+ dict(
71
+ type='PackDetInputs',
72
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
73
+ 'scale_factor'))
74
+ ]
75
+
76
+ dataset_type = 'CocoDataset'
77
+ data_root = 'data/coco/'
78
+
79
+ train_dataloader = dict(
80
+ dataset=dict(
81
+ type=dataset_type,
82
+ ann_file='annotations/instances_train2017.json',
83
+ data_prefix=dict(img='train2017/'),
84
+ pipeline=train_pipeline))
85
+ val_dataloader = dict(
86
+ dataset=dict(
87
+ type=dataset_type,
88
+ ann_file='annotations/instances_val2017.json',
89
+ data_prefix=dict(img='val2017/'),
90
+ pipeline=test_pipeline))
91
+ test_dataloader = val_dataloader
92
+
93
+ val_evaluator = dict(
94
+ _delete_=True,
95
+ type='CocoMetric',
96
+ ann_file=data_root + 'annotations/instances_val2017.json',
97
+ metric=['bbox', 'segm'],
98
+ format_only=False,
99
+ backend_args={{_base_.backend_args}})
100
+ test_evaluator = val_evaluator
mmdet/segm/mmdet_dd-person_mask2former.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # alias config
2
+ _base_ = ['mask2former_r50_8xb2-lsj-50e_coco.py']