xiaofanghf commited on
Commit
5cbf841
·
verified ·
1 Parent(s): 0878892

Delete ViTDet/projects/ViTDet/configs/001a_vitdet_mask-rcnn_vit-b-mae_lsj-100e_ReUtah-112px_HexGrid2_Imgs:small-only_Anno:small-only.py

Browse files
ViTDet/projects/ViTDet/configs/001a_vitdet_mask-rcnn_vit-b-mae_lsj-100e_ReUtah-112px_HexGrid2_Imgs:small-only_Anno:small-only.py DELETED
@@ -1,435 +0,0 @@
1
- _base_ = [
2
- '../../../configs/_base_/default_runtime.py',
3
- '../../../configs/_base_/models/mask-rcnn_r50_fpn.py',
4
- ]
5
-
6
- custom_imports = dict(imports=['projects.ViTDet.vitdet'])
7
-
8
-
9
- ## TRAIN DATASET
10
- data_root_train = '/var/storage/Common/SatelliteVehicles/Datasets/Real/Real-LINZ_112px_0.125m_RndSmpl_Imgs:all_Anno:small-only/train/'
11
-
12
-
13
- ## VALIDATION DATASET
14
- data_root_val = '/var/storage/Common/SatelliteVehicles/Datasets/Real/Real-LINZ_112px_0.125m_RndSmpl_Imgs:all_Anno:small-only/validation_subset025.0_seed0/'
15
-
16
- # TEST DATASET
17
- ## LINZ
18
- data_root_test = '/var/storage/Common/SatelliteVehicles/Datasets/Real/Real-LINZ_112px_0.125m_RndSmpl_Imgs:all_Anno:small-only/test/'
19
-
20
- ## Utah
21
- # data_root_test = '/var/storage/Common/SatelliteVehicles/Datasets/Real/Real-Utah_112px_0.125m_RndSmpl_Imgs:all_Anno:small-only/test/'
22
-
23
-
24
- train_batch_size_per_gpu = 24
25
- val_batch_size_per_gpu = 12
26
- test_batch_size_per_gpu = 60
27
-
28
- num_workers = 8
29
-
30
- max_epochs = 100
31
-
32
-
33
- # img_scale = (1024, 1024)
34
- # img_scale = (384, 384)
35
- img_scale = (128, 128)
36
-
37
- affine_scale = 0.9
38
-
39
- class_name = ('small',)
40
- num_classes = len(class_name)
41
- metainfo = dict(classes=class_name, palette=[(20, 220, 60)])
42
-
43
-
44
-
45
- load_from = 'https://download.openmmlab.com/mmdetection/v3.0/vitdet/vitdet_mask-rcnn_vit-b-mae_lsj-100e/vitdet_mask-rcnn_vit-b-mae_lsj-100e_20230328_153519-e15fe294.pth'
46
-
47
-
48
- # MODEL SETTINGS
49
- backbone_norm_cfg = dict(type='LN', requires_grad=True)
50
- norm_cfg = dict(type='LN2d', requires_grad=True)
51
-
52
- batch_augments = [
53
- dict(type='BatchFixedSizePad', size=img_scale, pad_mask=True)
54
- ]
55
-
56
- model = dict(
57
- data_preprocessor=dict(pad_size_divisor=32, batch_augments=batch_augments),
58
- backbone=dict(
59
- _delete_=True,
60
- type='ViT',
61
- # img_size=1024,
62
- # img_size=384,
63
- img_size=img_scale[0],
64
- patch_size=16,
65
- embed_dim=768,
66
- depth=12,
67
- num_heads=12,
68
- drop_path_rate=0.1,
69
- window_size=14,
70
- mlp_ratio=4,
71
- qkv_bias=True,
72
- norm_cfg=backbone_norm_cfg,
73
- window_block_indexes=[
74
- 0,
75
- 1,
76
- 3,
77
- 4,
78
- 6,
79
- 7,
80
- 9,
81
- 10,
82
- ],
83
- use_rel_pos=True,
84
- init_cfg=dict(
85
- type='Pretrained',
86
- # checkpoint='mae_pretrain_vit_base.pth'
87
- # checkpoint='detectron2://ImageNetPretrained/MAE/mae_pretrain_vit_base.pth'
88
- checkpoint='vitdet_mask-rcnn_vit-b-mae_lsj-100e_20230328_153519-e15fe294.pth'
89
- )
90
- ),
91
- neck=dict(
92
- _delete_=True,
93
- type='SimpleFPN',
94
- backbone_channel=768,
95
- in_channels=[192, 384, 768, 768],
96
- out_channels=256,
97
- num_outs=5,
98
- norm_cfg=norm_cfg),
99
- rpn_head=dict(num_convs=2),
100
- roi_head=dict(
101
- bbox_head=dict(
102
- type='Shared4Conv1FCBBoxHead',
103
- conv_out_channels=256,
104
- norm_cfg=norm_cfg,
105
- num_classes=num_classes
106
- ),
107
- # mask_head=dict( # No masks as used
108
- # norm_cfg=norm_cfg,
109
- # num_classes=1,
110
- # loss_mask=dict(
111
- # use_mask=False
112
- # ),
113
- # )
114
- mask_head=None
115
- )
116
- )
117
-
118
- custom_hooks = [dict(type='Fp16CompresssionHook')]
119
-
120
-
121
-
122
-
123
-
124
-
125
-
126
-
127
- ##
128
- dataset_type = 'CocoDataset'
129
- backend_args = None
130
-
131
- # Original
132
- # train_pipeline = [
133
- # dict(type='LoadImageFromFile', backend_args=backend_args),
134
- # dict(
135
- # type='LoadAnnotations',
136
- # with_bbox=True,
137
- # # with_mask=True
138
- # with_mask=False
139
- # ),
140
- # dict(type='RandomFlip', prob=0.5),
141
- # dict(
142
- # type='RandomResize',
143
- # scale=img_scale,
144
- # ratio_range=(0.1, 2.0),
145
- # keep_ratio=True),
146
- # dict(
147
- # type='RandomCrop',
148
- # crop_type='absolute_range',
149
- # crop_size=img_scale,
150
- # recompute_bbox=True,
151
- # allow_negative_crop=True),
152
- # dict(type='FilterAnnotations', min_gt_bbox_wh=(1e-2, 1e-2)),
153
- # dict(type='Pad', size=img_scale, pad_val=dict(img=(114, 114, 114))),
154
- # dict(type='PackDetInputs')
155
- # ]
156
-
157
- pre_transform = [
158
- dict(type='LoadImageFromFile', backend_args=backend_args),
159
- dict(type='LoadAnnotations', with_bbox=True, with_mask=False)
160
- ]
161
-
162
- albu_train_transforms = [
163
- dict(type='Blur', p=0.01),
164
- dict(type='MedianBlur', p=0.01),
165
- dict(type='ToGray', p=0.01),
166
- dict(type='CLAHE', p=0.01)
167
- ]
168
-
169
- last_transform = [
170
- dict(
171
- type='Albu',
172
- transforms=albu_train_transforms,
173
- bbox_params=dict(
174
- type='BboxParams',
175
- format='pascal_voc',
176
- label_fields=['gt_bboxes_labels', 'gt_ignore_flags']),
177
- keymap={
178
- 'img': 'image',
179
- 'gt_bboxes': 'bboxes'
180
- }),
181
- dict(type='YOLOXHSVRandomAug'), # ???
182
- dict(type='RandomFlip', prob=0.5),
183
- dict(
184
- type='PackDetInputs',
185
- meta_keys=(
186
- 'img_id',
187
- 'img_path',
188
- 'ori_shape',
189
- 'img_shape',
190
- 'flip',
191
- 'flip_direction'
192
- )
193
- )
194
- ]
195
-
196
- mosaic_affine_transform = [
197
- dict(
198
- type='Mosaic',
199
- img_scale=img_scale,
200
- pad_val=114.0,
201
- ),
202
- dict(
203
- type='RandomAffine',
204
- max_rotate_degree=0.0,
205
- max_shear_degree=0.0,
206
- scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
207
- # img_scale is (width, height)
208
- border=(-img_scale[0] // 2, -img_scale[1] // 2),
209
- border_val=(114, 114, 114))
210
- ]
211
-
212
- train_pipeline = [
213
- *pre_transform,
214
- *mosaic_affine_transform,
215
- dict(
216
- type='MixUp',
217
- img_scale=img_scale,
218
- ),
219
- *last_transform
220
- ]
221
-
222
-
223
-
224
- # Original
225
- # train_dataloader = dict(
226
- # batch_size=train_batch_size_per_gpu,
227
- # num_workers=num_workers,
228
- # persistent_workers=True,
229
- # sampler=dict(type='DefaultSampler', shuffle=True),
230
- # dataset=dict(
231
- # type=dataset_type,
232
- # data_root=data_root_train,
233
- # ann_file=data_root_train + 'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
234
- # data_prefix=dict(img='images/'),
235
- # # filter_cfg=dict(filter_empty_gt=True, min_size=32),
236
- # filter_cfg=dict(filter_empty_gt=False),
237
- # pipeline=train_pipeline,
238
- # metainfo=metainfo,
239
- # )
240
- # )
241
-
242
- train_dataloader = dict(
243
- batch_size=train_batch_size_per_gpu,
244
- num_workers=num_workers,
245
- persistent_workers=True,
246
- sampler=dict(type='DefaultSampler', shuffle=True),
247
- batch_sampler=dict(type='AspectRatioBatchSampler'),
248
- dataset=dict(
249
- # _delete_=True,
250
- type='MultiImageMixDataset',
251
- dataset=dict(
252
- type=dataset_type,
253
- data_root=data_root_train,
254
- ann_file=data_root_train + 'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
255
- data_prefix=dict(img='images/'),
256
- filter_cfg=dict(filter_empty_gt=False, min_size=32),
257
- metainfo=metainfo,
258
- backend_args=backend_args,
259
- pipeline=pre_transform
260
- ),
261
- pipeline=train_pipeline,
262
- )
263
- )
264
-
265
-
266
- test_pipeline = [
267
- dict(type='LoadImageFromFile', backend_args=backend_args),
268
- dict(type='Resize', scale=img_scale, keep_ratio=True),
269
- dict(type='Pad', size=img_scale, pad_val=dict(img=(114, 114, 114))),
270
- dict(
271
- type='LoadAnnotations',
272
- with_bbox=True,
273
- # with_mask=True
274
- with_mask=False
275
- ),
276
- dict(
277
- type='PackDetInputs',
278
- meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
279
- 'scale_factor'))
280
- ]
281
-
282
- val_dataloader = dict(
283
- batch_size=val_batch_size_per_gpu,
284
- num_workers=num_workers,
285
- persistent_workers=True,
286
- drop_last=False,
287
- sampler=dict(type='DefaultSampler', shuffle=False),
288
- dataset=dict(
289
- type=dataset_type,
290
- data_root=data_root_val,
291
- ann_file=data_root_val + 'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
292
- data_prefix=dict(img='images/'),
293
- test_mode=True,
294
- pipeline=test_pipeline,
295
- metainfo=metainfo,
296
- )
297
- )
298
- # test_dataloader = val_dataloader
299
- test_dataloader = dict(
300
- batch_size=test_batch_size_per_gpu,
301
- num_workers=num_workers,
302
- persistent_workers=True,
303
- drop_last=False,
304
- sampler=dict(type='DefaultSampler', shuffle=False),
305
- dataset=dict(
306
- type=dataset_type,
307
- data_root=data_root_test,
308
- ann_file=data_root_test + 'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
309
- data_prefix=dict(img='images/'),
310
- test_mode=True,
311
- pipeline=test_pipeline,
312
- metainfo=metainfo,
313
- )
314
- )
315
-
316
- val_evaluator = dict(
317
- type='CocoMetric',
318
- ann_file=data_root_val + 'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
319
- metric='bbox',
320
- format_only=False)
321
- # test_evaluator = val_evaluator
322
- test_evaluator = dict(
323
- type='CocoMetric',
324
- ann_file=data_root_test + 'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
325
- metric='bbox',
326
- format_only=False
327
- )
328
-
329
- optim_wrapper = dict(
330
- type='AmpOptimWrapper',
331
- constructor='LayerDecayOptimizerConstructor',
332
- paramwise_cfg={
333
- 'decay_rate': 0.7,
334
- 'decay_type': 'layer_wise',
335
- 'num_layers': 12,
336
- },
337
- optimizer=dict(
338
- type='AdamW',
339
- # lr=0.0001,
340
- # lr=0.01,
341
- lr=0.001,
342
- betas=(0.9, 0.999),
343
- weight_decay=0.1,
344
- ))
345
-
346
- # 100 ep = 184375 iters * 64 images/iter / 118000 images/ep
347
- # max_iters = 184375
348
- # interval = 5000
349
- max_iters = 100000
350
-
351
- # interval = 2000
352
- interval = 1000
353
-
354
- dynamic_intervals = [(max_iters // interval * interval + 1, max_iters)]
355
- param_scheduler = [
356
- dict(
357
- type='LinearLR',
358
- start_factor=0.001,
359
- by_epoch=False,
360
- begin=0,
361
- end=250
362
- ),
363
- dict(
364
- type='MultiStepLR',
365
- begin=0,
366
-
367
- end=max_iters,
368
- # end=max_epochs,
369
-
370
- by_epoch=False,
371
- # by_epoch=True,
372
-
373
- # 88 ep = [163889 iters * 64 images/iter / 118000 images/ep
374
- # 96 ep = [177546 iters * 64 images/iter / 118000 images/ep
375
- # milestones=[20, 29],
376
- # milestones=[5000, 6000],
377
- milestones=[1000, 2000],
378
- gamma=0.1
379
- )
380
- ]
381
-
382
- train_cfg = dict(
383
- type='IterBasedTrainLoop',
384
- max_iters=max_iters,
385
- val_interval=interval,
386
- dynamic_intervals=dynamic_intervals
387
- )
388
- # train_cfg = dict(
389
- # type='EpochBasedTrainLoop',
390
- # max_epochs=max_epochs,
391
- # val_interval=1
392
- # )
393
-
394
- val_cfg = dict(type='ValLoop')
395
- test_cfg = dict(type='TestLoop')
396
-
397
- default_hooks = dict(
398
- logger=dict(
399
- type='LoggerHook',
400
- interval=50,
401
- log_metric_by_epoch=False
402
- ),
403
- checkpoint=dict(
404
- type='CheckpointHook',
405
- by_epoch=False,
406
- # by_epoch=True,
407
- save_last=True,
408
- # interval=1,
409
- interval=interval,
410
- save_best=['coco/bbox_mAP', 'coco/bbox_mAP_50'],
411
- max_keep_ckpts=2
412
- )
413
- )
414
-
415
- vis_backends = [
416
- dict(type='LocalVisBackend'),
417
- dict(type='TensorboardVisBackend')
418
- ]
419
-
420
- visualizer = dict(
421
- type='DetLocalVisualizer',
422
- vis_backends=vis_backends,
423
- name='visualizer'
424
- )
425
-
426
- log_processor = dict(
427
- type='LogProcessor',
428
- window_size=50,
429
- by_epoch=False
430
- # by_epoch=True
431
- )
432
-
433
- auto_scale_lr = dict(base_batch_size=64)
434
-
435
-