xiaofanghf commited on
Commit
0966166
·
verified ·
1 Parent(s): 6ef8d2f

Upload folder using huggingface_hub

Browse files
FasterRCNN/best_coco_bbox_mAP_50_epoch_7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa172df9b6e3ddb3c258aabba90cb970cc44fe3037bdf4f81e361fa0a45ba1fc
3
+ size 165917385
FasterRCNN/configs/faster_rcnn/faster-rcnn_r50_fpn_2x_coco_ReLINZ_112px_RndSmpl_Imgs:all_Anno:small-only_Augm_Val:LINZ.py ADDED
@@ -0,0 +1,412 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = [
2
+ '../_base_/models/faster-rcnn_r50_fpn.py',
3
+ '../_base_/datasets/coco_detection.py',
4
+ '../_base_/schedules/schedule_2x.py', '../_base_/default_runtime.py'
5
+ ]
6
+
7
+
8
+
9
+ # TRAIN DATASET
10
+ data_root_train = '/var/storage/Common/SatelliteVehicles/Datasets/Real/Real-LINZ_112px_0.125m_RndSmpl_Imgs:all_Anno:small-only/train/'
11
+
12
+ # VAL DATASET
13
+ # data_root_val = '/var/storage/Common/SatelliteVehicles/Datasets/Real/Real-LINZ_112px_0.125m_RndSmpl_Imgs:all_Anno:small-only/validation/'
14
+ data_root_val = '/var/storage/Common/SatelliteVehicles/Datasets/Real/Real-LINZ_112px_0.125m_RndSmpl_Imgs:all_Anno:small-only/validation_subset025.0_seed0/'
15
+
16
+ # TEST DATASET
17
+ ## LINZ
18
+ data_root_test = '/var/storage/Common/SatelliteVehicles/Datasets/Real/Real-LINZ_112px_0.125m_RndSmpl_Imgs:all_Anno:small-only/test/'
19
+
20
+ ## Utah
21
+ # data_root_test = '/var/storage/Common/SatelliteVehicles/Datasets/Real/Real-Utah_112px_0.125m_RndSmpl_Imgs:all_Anno:small-only/test/'
22
+
23
+
24
+ max_epochs = 1000 # 40
25
+ train_batch_size_per_gpu = 64
26
+ validation_batch_size_per_gpu = 64
27
+ test_batch_size_per_gpu = 64
28
+ num_workers = 8
29
+
30
+
31
+ class_name = ('small',)
32
+ num_classes = len(class_name)
33
+ metainfo = dict(classes=class_name, palette=[(20, 220, 60)])
34
+
35
+ img_scale = (128, 128)
36
+
37
+ affine_scale = 0.9
38
+
39
+ load_from = 'https://download.openxlab.org.cn/models/mmdetection/FasterR-CNN/weight/faster-rcnn_r50_fpn_2x_coco'
40
+
41
+
42
+ # model settings
43
+ model = dict(
44
+ type='FasterRCNN',
45
+ data_preprocessor=dict(
46
+ type='DetDataPreprocessor',
47
+ mean=[123.675, 116.28, 103.53],
48
+ std=[58.395, 57.12, 57.375],
49
+ bgr_to_rgb=True,
50
+ pad_size_divisor=32),
51
+ backbone=dict(
52
+ type='ResNet',
53
+ depth=50,
54
+ num_stages=4,
55
+ out_indices=(0, 1, 2, 3),
56
+ frozen_stages=1,
57
+ norm_cfg=dict(type='BN', requires_grad=True),
58
+ norm_eval=True,
59
+ style='pytorch',
60
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
61
+ neck=dict(
62
+ type='FPN',
63
+ in_channels=[256, 512, 1024, 2048],
64
+ out_channels=256,
65
+ num_outs=5),
66
+ rpn_head=dict(
67
+ type='RPNHead',
68
+ in_channels=256,
69
+ feat_channels=256,
70
+ anchor_generator=dict(
71
+ type='AnchorGenerator',
72
+ scales=[8],
73
+ ratios=[0.5, 1.0, 2.0],
74
+ strides=[4, 8, 16, 32, 64]),
75
+ bbox_coder=dict(
76
+ type='DeltaXYWHBBoxCoder',
77
+ target_means=[.0, .0, .0, .0],
78
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
79
+ loss_cls=dict(
80
+ type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
81
+ loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
82
+ roi_head=dict(
83
+ type='StandardRoIHead',
84
+ bbox_roi_extractor=dict(
85
+ type='SingleRoIExtractor',
86
+ roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
87
+ out_channels=256,
88
+ featmap_strides=[4, 8, 16, 32]),
89
+ bbox_head=dict(
90
+ type='Shared2FCBBoxHead',
91
+ in_channels=256,
92
+ fc_out_channels=1024,
93
+ roi_feat_size=7,
94
+ num_classes=num_classes,
95
+ bbox_coder=dict(
96
+ type='DeltaXYWHBBoxCoder',
97
+ target_means=[0., 0., 0., 0.],
98
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
99
+ reg_class_agnostic=False,
100
+ loss_cls=dict(
101
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
102
+ loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
103
+ # model training and testing settings
104
+ train_cfg=dict(
105
+ rpn=dict(
106
+ assigner=dict(
107
+ type='MaxIoUAssigner',
108
+ pos_iou_thr=0.7,
109
+ neg_iou_thr=0.3,
110
+ min_pos_iou=0.3,
111
+ match_low_quality=True,
112
+ ignore_iof_thr=-1),
113
+ sampler=dict(
114
+ type='RandomSampler',
115
+ num=256,
116
+ pos_fraction=0.5,
117
+ neg_pos_ub=-1,
118
+ add_gt_as_proposals=False),
119
+ allowed_border=-1,
120
+ pos_weight=-1,
121
+ debug=False),
122
+ rpn_proposal=dict(
123
+ nms_pre=2000,
124
+ max_per_img=1000,
125
+ nms=dict(type='nms', iou_threshold=0.7),
126
+ min_bbox_size=0),
127
+ rcnn=dict(
128
+ assigner=dict(
129
+ type='MaxIoUAssigner',
130
+ pos_iou_thr=0.5,
131
+ neg_iou_thr=0.5,
132
+ min_pos_iou=0.5,
133
+ match_low_quality=False,
134
+ ignore_iof_thr=-1),
135
+ sampler=dict(
136
+ type='RandomSampler',
137
+ num=512,
138
+ pos_fraction=0.25,
139
+ neg_pos_ub=-1,
140
+ add_gt_as_proposals=True),
141
+ pos_weight=-1,
142
+ debug=False)),
143
+ test_cfg=dict(
144
+ rpn=dict(
145
+ nms_pre=1000,
146
+ max_per_img=1000,
147
+ nms=dict(type='nms', iou_threshold=0.7),
148
+ min_bbox_size=0),
149
+ rcnn=dict(
150
+ score_thr=0.05,
151
+ nms=dict(type='nms', iou_threshold=0.5),
152
+ max_per_img=100)
153
+ # soft-nms is also supported for rcnn testing
154
+ # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05)
155
+ ))
156
+
157
+
158
+
159
+ dataset_type = 'CocoDataset'
160
+
161
+ backend_args = None
162
+
163
+ # Original
164
+ # train_pipeline = [
165
+ # dict(type='LoadImageFromFile', backend_args=backend_args),
166
+ # dict(type='LoadAnnotations', with_bbox=True),
167
+ # dict(type='Resize', scale=img_scale, keep_ratio=True),
168
+ # dict(type='RandomFlip', prob=0.5),
169
+ # dict(type='PackDetInputs')
170
+ # ]
171
+
172
+ pre_transform = [
173
+ dict(type='LoadImageFromFile', backend_args=backend_args),
174
+ dict(type='LoadAnnotations', with_bbox=True)
175
+ ]
176
+
177
+ albu_train_transforms = [
178
+ dict(type='Blur', p=0.01),
179
+ dict(type='MedianBlur', p=0.01),
180
+ dict(type='ToGray', p=0.01),
181
+ dict(type='CLAHE', p=0.01)
182
+ ]
183
+
184
+ last_transform = [
185
+ dict(
186
+ type='Albu',
187
+ transforms=albu_train_transforms,
188
+ bbox_params=dict(
189
+ type='BboxParams',
190
+ format='pascal_voc',
191
+ label_fields=['gt_bboxes_labels', 'gt_ignore_flags']),
192
+ keymap={
193
+ 'img': 'image',
194
+ 'gt_bboxes': 'bboxes'
195
+ }),
196
+ dict(type='YOLOXHSVRandomAug'), # ???
197
+ dict(type='RandomFlip', prob=0.5),
198
+ dict(
199
+ type='PackDetInputs',
200
+ meta_keys=(
201
+ 'img_id',
202
+ 'img_path',
203
+ 'ori_shape',
204
+ 'img_shape',
205
+ 'flip',
206
+ 'flip_direction'
207
+ )
208
+ )
209
+ ]
210
+
211
+ mosaic_affine_transform = [
212
+ dict(
213
+ type='Mosaic',
214
+ img_scale=img_scale,
215
+ pad_val=114.0,
216
+ ),
217
+ dict(
218
+ type='RandomAffine',
219
+ max_rotate_degree=0.0,
220
+ max_shear_degree=0.0,
221
+ scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
222
+ # img_scale is (width, height)
223
+ border=(-img_scale[0] // 2, -img_scale[1] // 2),
224
+ border_val=(114, 114, 114))
225
+ ]
226
+
227
+ train_pipeline = [
228
+ *pre_transform,
229
+ *mosaic_affine_transform,
230
+ dict(
231
+ type='MixUp',
232
+ img_scale=img_scale,
233
+ ),
234
+ *last_transform
235
+ ]
236
+
237
+
238
+ # Original
239
+ # train_dataloader = dict(
240
+ # batch_size=train_batch_size_per_gpu,
241
+ # num_workers=num_workers,
242
+ # persistent_workers=True,
243
+ # sampler=dict(type='DefaultSampler', shuffle=True),
244
+ # batch_sampler=dict(type='AspectRatioBatchSampler'),
245
+ # dataset=dict(
246
+ # type=dataset_type,
247
+ # data_root=data_root_train,
248
+ # ann_file='annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500_BalancedRatio:0.2000.json',
249
+ # data_prefix=dict(img='images/'),
250
+ # filter_cfg=dict(filter_empty_gt=False, min_size=32),
251
+ # pipeline=train_pipeline,
252
+ # metainfo=metainfo,
253
+ # backend_args=backend_args
254
+ # )
255
+ # )
256
+
257
+ train_dataloader = dict(
258
+ batch_size=train_batch_size_per_gpu,
259
+ num_workers=num_workers,
260
+ persistent_workers=True,
261
+ sampler=dict(type='DefaultSampler', shuffle=True),
262
+ batch_sampler=dict(type='AspectRatioBatchSampler'),
263
+ dataset=dict(
264
+ _delete_=True,
265
+ type='MultiImageMixDataset',
266
+ dataset=dict(
267
+ type=dataset_type,
268
+ data_root=data_root_train,
269
+ # ann_file='annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
270
+ ann_file='annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500_BalancedRatio:0.2000.json',
271
+ data_prefix=dict(img='images/'),
272
+ filter_cfg=dict(filter_empty_gt=False, min_size=32),
273
+ metainfo=metainfo,
274
+ backend_args=backend_args,
275
+ pipeline=pre_transform
276
+ ),
277
+ pipeline=train_pipeline,
278
+ )
279
+ )
280
+
281
+
282
+
283
+ test_pipeline = [
284
+ dict(type='LoadImageFromFile', backend_args=backend_args),
285
+ dict(type='Resize', scale=img_scale, keep_ratio=True),
286
+ # If you don't have a gt annotation, delete the pipeline
287
+ dict(type='LoadAnnotations', with_bbox=True),
288
+ dict(
289
+ type='PackDetInputs',
290
+ meta_keys=(
291
+ 'img_id', 'img_path', 'ori_shape', 'img_shape',
292
+ 'scale_factor'
293
+ )
294
+ )
295
+ ]
296
+
297
+
298
+ val_dataloader = dict(
299
+ batch_size=validation_batch_size_per_gpu,
300
+ num_workers=num_workers,
301
+ persistent_workers=True,
302
+ drop_last=False,
303
+ sampler=dict(type='DefaultSampler', shuffle=False),
304
+ dataset=dict(
305
+ type=dataset_type,
306
+ data_root=data_root_val,
307
+ ann_file='annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
308
+ data_prefix=dict(img='images/'),
309
+ filter_cfg=dict(filter_empty_gt=False, min_size=32),
310
+ test_mode=True,
311
+ pipeline=test_pipeline,
312
+ metainfo=metainfo,
313
+ backend_args=backend_args
314
+ )
315
+ )
316
+
317
+ test_dataloader = dict(
318
+ batch_size=test_batch_size_per_gpu,
319
+ num_workers=num_workers,
320
+ persistent_workers=True,
321
+ drop_last=False,
322
+ sampler=dict(type='DefaultSampler', shuffle=False),
323
+ dataset=dict(
324
+ type=dataset_type,
325
+ data_root=data_root_test,
326
+ ann_file='annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
327
+ data_prefix=dict(img='images/'),
328
+ filter_cfg=dict(filter_empty_gt=False, min_size=32),
329
+ test_mode=True,
330
+ pipeline=test_pipeline,
331
+ metainfo=metainfo,
332
+ backend_args=backend_args
333
+ )
334
+ )
335
+
336
+ # test_dataloader = val_dataloader
337
+
338
+ val_evaluator = dict(
339
+ type='CocoMetric',
340
+ ann_file=data_root_val + 'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
341
+ metric='bbox',
342
+ format_only=False,
343
+ backend_args=backend_args
344
+ )
345
+
346
+ test_evaluator = dict(
347
+ type='CocoMetric',
348
+ ann_file=data_root_test + 'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
349
+ metric='bbox',
350
+ format_only=False,
351
+ backend_args=backend_args
352
+ )
353
+
354
+ # test_evaluator = val_evaluator
355
+
356
+
357
+
358
+ # training schedule for 2x
359
+ train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=max_epochs, val_interval=1)
360
+ val_cfg = dict(type='ValLoop')
361
+ test_cfg = dict(type='TestLoop')
362
+
363
+ # learning rate
364
+ param_scheduler = [
365
+ dict(
366
+ type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500),
367
+ dict(
368
+ type='MultiStepLR',
369
+ begin=0,
370
+ end=max_epochs,
371
+ by_epoch=True,
372
+ milestones=[16, 22],
373
+ gamma=0.1)
374
+ ]
375
+
376
+ # optimizer
377
+ optim_wrapper = dict(
378
+ type='OptimWrapper',
379
+ optimizer=dict(
380
+ type='SGD',
381
+ lr=0.2,
382
+ momentum=0.9,
383
+ weight_decay=0.0001
384
+ )
385
+ )
386
+
387
+ # Default setting for scaling LR automatically
388
+ # - `enable` means enable scaling LR automatically
389
+ # or not by default.
390
+ # - `base_batch_size` = (8 GPUs) x (2 samples per GPU)
391
+ auto_scale_lr = dict(enable=False, base_batch_size=train_batch_size_per_gpu)
392
+
393
+
394
+ default_hooks = dict(
395
+ checkpoint=dict(
396
+ interval=1,
397
+ max_keep_ckpts=1,
398
+ save_best=['coco/bbox_mAP', 'coco/bbox_mAP_50']
399
+ ),
400
+ # The warmup_mim_iter parameter is critical.
401
+ # The default value is 1000 which is not suitable for cat datasets.
402
+ # param_scheduler=dict(
403
+ # max_epochs=max_epochs,
404
+ # warmup_mim_iter=1000,
405
+ # lr_factor=lr_factor
406
+ # ),
407
+ logger=dict(type='LoggerHook', interval=5))
408
+
409
+ vis_backends = [dict(type='LocalVisBackend'), dict(type='TensorboardVisBackend')]
410
+ visualizer = dict(
411
+ type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
412
+
ViTDet/best_coco_bbox_mAP_50_iter_16000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d60857c5f22007ce2a8711bcf3d27a77818888fbea48ff8b529dd6f5ee7b397
3
+ size 434474871
ViTDet/projects/ViTDet/configs/001a_vitdet_mask-rcnn_vit-b-mae_lsj-100e_ReUtah-112px_HexGrid2_Imgs:small-only_Anno:small-only.py ADDED
@@ -0,0 +1,435 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = [
2
+ '../../../configs/_base_/default_runtime.py',
3
+ '../../../configs/_base_/models/mask-rcnn_r50_fpn.py',
4
+ ]
5
+
6
+ custom_imports = dict(imports=['projects.ViTDet.vitdet'])
7
+
8
+
9
+ ## TRAIN DATASET
10
+ data_root_train = '/var/storage/Common/SatelliteVehicles/Datasets/Real/Real-LINZ_112px_0.125m_RndSmpl_Imgs:all_Anno:small-only/train/'
11
+
12
+
13
+ ## VALIDATION DATASET
14
+ data_root_val = '/var/storage/Common/SatelliteVehicles/Datasets/Real/Real-LINZ_112px_0.125m_RndSmpl_Imgs:all_Anno:small-only/validation_subset025.0_seed0/'
15
+
16
+ # TEST DATASET
17
+ ## LINZ
18
+ data_root_test = '/var/storage/Common/SatelliteVehicles/Datasets/Real/Real-LINZ_112px_0.125m_RndSmpl_Imgs:all_Anno:small-only/test/'
19
+
20
+ ## Utah
21
+ # data_root_test = '/var/storage/Common/SatelliteVehicles/Datasets/Real/Real-Utah_112px_0.125m_RndSmpl_Imgs:all_Anno:small-only/test/'
22
+
23
+
24
+ train_batch_size_per_gpu = 24
25
+ val_batch_size_per_gpu = 12
26
+ test_batch_size_per_gpu = 60
27
+
28
+ num_workers = 8
29
+
30
+ max_epochs = 100
31
+
32
+
33
+ # img_scale = (1024, 1024)
34
+ # img_scale = (384, 384)
35
+ img_scale = (128, 128)
36
+
37
+ affine_scale = 0.9
38
+
39
+ class_name = ('small',)
40
+ num_classes = len(class_name)
41
+ metainfo = dict(classes=class_name, palette=[(20, 220, 60)])
42
+
43
+
44
+
45
+ load_from = 'https://download.openmmlab.com/mmdetection/v3.0/vitdet/vitdet_mask-rcnn_vit-b-mae_lsj-100e/vitdet_mask-rcnn_vit-b-mae_lsj-100e_20230328_153519-e15fe294.pth'
46
+
47
+
48
+ # MODEL SETTINGS
49
+ backbone_norm_cfg = dict(type='LN', requires_grad=True)
50
+ norm_cfg = dict(type='LN2d', requires_grad=True)
51
+
52
+ batch_augments = [
53
+ dict(type='BatchFixedSizePad', size=img_scale, pad_mask=True)
54
+ ]
55
+
56
+ model = dict(
57
+ data_preprocessor=dict(pad_size_divisor=32, batch_augments=batch_augments),
58
+ backbone=dict(
59
+ _delete_=True,
60
+ type='ViT',
61
+ # img_size=1024,
62
+ # img_size=384,
63
+ img_size=img_scale[0],
64
+ patch_size=16,
65
+ embed_dim=768,
66
+ depth=12,
67
+ num_heads=12,
68
+ drop_path_rate=0.1,
69
+ window_size=14,
70
+ mlp_ratio=4,
71
+ qkv_bias=True,
72
+ norm_cfg=backbone_norm_cfg,
73
+ window_block_indexes=[
74
+ 0,
75
+ 1,
76
+ 3,
77
+ 4,
78
+ 6,
79
+ 7,
80
+ 9,
81
+ 10,
82
+ ],
83
+ use_rel_pos=True,
84
+ init_cfg=dict(
85
+ type='Pretrained',
86
+ # checkpoint='mae_pretrain_vit_base.pth'
87
+ # checkpoint='detectron2://ImageNetPretrained/MAE/mae_pretrain_vit_base.pth'
88
+ checkpoint='vitdet_mask-rcnn_vit-b-mae_lsj-100e_20230328_153519-e15fe294.pth'
89
+ )
90
+ ),
91
+ neck=dict(
92
+ _delete_=True,
93
+ type='SimpleFPN',
94
+ backbone_channel=768,
95
+ in_channels=[192, 384, 768, 768],
96
+ out_channels=256,
97
+ num_outs=5,
98
+ norm_cfg=norm_cfg),
99
+ rpn_head=dict(num_convs=2),
100
+ roi_head=dict(
101
+ bbox_head=dict(
102
+ type='Shared4Conv1FCBBoxHead',
103
+ conv_out_channels=256,
104
+ norm_cfg=norm_cfg,
105
+ num_classes=num_classes
106
+ ),
107
+ # mask_head=dict( # No masks as used
108
+ # norm_cfg=norm_cfg,
109
+ # num_classes=1,
110
+ # loss_mask=dict(
111
+ # use_mask=False
112
+ # ),
113
+ # )
114
+ mask_head=None
115
+ )
116
+ )
117
+
118
+ custom_hooks = [dict(type='Fp16CompresssionHook')]
119
+
120
+
121
+
122
+
123
+
124
+
125
+
126
+
127
+ ##
128
+ dataset_type = 'CocoDataset'
129
+ backend_args = None
130
+
131
+ # Original
132
+ # train_pipeline = [
133
+ # dict(type='LoadImageFromFile', backend_args=backend_args),
134
+ # dict(
135
+ # type='LoadAnnotations',
136
+ # with_bbox=True,
137
+ # # with_mask=True
138
+ # with_mask=False
139
+ # ),
140
+ # dict(type='RandomFlip', prob=0.5),
141
+ # dict(
142
+ # type='RandomResize',
143
+ # scale=img_scale,
144
+ # ratio_range=(0.1, 2.0),
145
+ # keep_ratio=True),
146
+ # dict(
147
+ # type='RandomCrop',
148
+ # crop_type='absolute_range',
149
+ # crop_size=img_scale,
150
+ # recompute_bbox=True,
151
+ # allow_negative_crop=True),
152
+ # dict(type='FilterAnnotations', min_gt_bbox_wh=(1e-2, 1e-2)),
153
+ # dict(type='Pad', size=img_scale, pad_val=dict(img=(114, 114, 114))),
154
+ # dict(type='PackDetInputs')
155
+ # ]
156
+
157
+ pre_transform = [
158
+ dict(type='LoadImageFromFile', backend_args=backend_args),
159
+ dict(type='LoadAnnotations', with_bbox=True, with_mask=False)
160
+ ]
161
+
162
+ albu_train_transforms = [
163
+ dict(type='Blur', p=0.01),
164
+ dict(type='MedianBlur', p=0.01),
165
+ dict(type='ToGray', p=0.01),
166
+ dict(type='CLAHE', p=0.01)
167
+ ]
168
+
169
+ last_transform = [
170
+ dict(
171
+ type='Albu',
172
+ transforms=albu_train_transforms,
173
+ bbox_params=dict(
174
+ type='BboxParams',
175
+ format='pascal_voc',
176
+ label_fields=['gt_bboxes_labels', 'gt_ignore_flags']),
177
+ keymap={
178
+ 'img': 'image',
179
+ 'gt_bboxes': 'bboxes'
180
+ }),
181
+ dict(type='YOLOXHSVRandomAug'), # ???
182
+ dict(type='RandomFlip', prob=0.5),
183
+ dict(
184
+ type='PackDetInputs',
185
+ meta_keys=(
186
+ 'img_id',
187
+ 'img_path',
188
+ 'ori_shape',
189
+ 'img_shape',
190
+ 'flip',
191
+ 'flip_direction'
192
+ )
193
+ )
194
+ ]
195
+
196
+ mosaic_affine_transform = [
197
+ dict(
198
+ type='Mosaic',
199
+ img_scale=img_scale,
200
+ pad_val=114.0,
201
+ ),
202
+ dict(
203
+ type='RandomAffine',
204
+ max_rotate_degree=0.0,
205
+ max_shear_degree=0.0,
206
+ scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
207
+ # img_scale is (width, height)
208
+ border=(-img_scale[0] // 2, -img_scale[1] // 2),
209
+ border_val=(114, 114, 114))
210
+ ]
211
+
212
+ train_pipeline = [
213
+ *pre_transform,
214
+ *mosaic_affine_transform,
215
+ dict(
216
+ type='MixUp',
217
+ img_scale=img_scale,
218
+ ),
219
+ *last_transform
220
+ ]
221
+
222
+
223
+
224
+ # Original
225
+ # train_dataloader = dict(
226
+ # batch_size=train_batch_size_per_gpu,
227
+ # num_workers=num_workers,
228
+ # persistent_workers=True,
229
+ # sampler=dict(type='DefaultSampler', shuffle=True),
230
+ # dataset=dict(
231
+ # type=dataset_type,
232
+ # data_root=data_root_train,
233
+ # ann_file=data_root_train + 'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
234
+ # data_prefix=dict(img='images/'),
235
+ # # filter_cfg=dict(filter_empty_gt=True, min_size=32),
236
+ # filter_cfg=dict(filter_empty_gt=False),
237
+ # pipeline=train_pipeline,
238
+ # metainfo=metainfo,
239
+ # )
240
+ # )
241
+
242
+ train_dataloader = dict(
243
+ batch_size=train_batch_size_per_gpu,
244
+ num_workers=num_workers,
245
+ persistent_workers=True,
246
+ sampler=dict(type='DefaultSampler', shuffle=True),
247
+ batch_sampler=dict(type='AspectRatioBatchSampler'),
248
+ dataset=dict(
249
+ # _delete_=True,
250
+ type='MultiImageMixDataset',
251
+ dataset=dict(
252
+ type=dataset_type,
253
+ data_root=data_root_train,
254
+ ann_file=data_root_train + 'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
255
+ data_prefix=dict(img='images/'),
256
+ filter_cfg=dict(filter_empty_gt=False, min_size=32),
257
+ metainfo=metainfo,
258
+ backend_args=backend_args,
259
+ pipeline=pre_transform
260
+ ),
261
+ pipeline=train_pipeline,
262
+ )
263
+ )
264
+
265
+
266
+ test_pipeline = [
267
+ dict(type='LoadImageFromFile', backend_args=backend_args),
268
+ dict(type='Resize', scale=img_scale, keep_ratio=True),
269
+ dict(type='Pad', size=img_scale, pad_val=dict(img=(114, 114, 114))),
270
+ dict(
271
+ type='LoadAnnotations',
272
+ with_bbox=True,
273
+ # with_mask=True
274
+ with_mask=False
275
+ ),
276
+ dict(
277
+ type='PackDetInputs',
278
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
279
+ 'scale_factor'))
280
+ ]
281
+
282
+ val_dataloader = dict(
283
+ batch_size=val_batch_size_per_gpu,
284
+ num_workers=num_workers,
285
+ persistent_workers=True,
286
+ drop_last=False,
287
+ sampler=dict(type='DefaultSampler', shuffle=False),
288
+ dataset=dict(
289
+ type=dataset_type,
290
+ data_root=data_root_val,
291
+ ann_file=data_root_val + 'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
292
+ data_prefix=dict(img='images/'),
293
+ test_mode=True,
294
+ pipeline=test_pipeline,
295
+ metainfo=metainfo,
296
+ )
297
+ )
298
+ # test_dataloader = val_dataloader
299
+ test_dataloader = dict(
300
+ batch_size=test_batch_size_per_gpu,
301
+ num_workers=num_workers,
302
+ persistent_workers=True,
303
+ drop_last=False,
304
+ sampler=dict(type='DefaultSampler', shuffle=False),
305
+ dataset=dict(
306
+ type=dataset_type,
307
+ data_root=data_root_test,
308
+ ann_file=data_root_test + 'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
309
+ data_prefix=dict(img='images/'),
310
+ test_mode=True,
311
+ pipeline=test_pipeline,
312
+ metainfo=metainfo,
313
+ )
314
+ )
315
+
316
+ val_evaluator = dict(
317
+ type='CocoMetric',
318
+ ann_file=data_root_val + 'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
319
+ metric='bbox',
320
+ format_only=False)
321
+ # test_evaluator = val_evaluator
322
+ test_evaluator = dict(
323
+ type='CocoMetric',
324
+ ann_file=data_root_test + 'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
325
+ metric='bbox',
326
+ format_only=False
327
+ )
328
+
329
+ optim_wrapper = dict(
330
+ type='AmpOptimWrapper',
331
+ constructor='LayerDecayOptimizerConstructor',
332
+ paramwise_cfg={
333
+ 'decay_rate': 0.7,
334
+ 'decay_type': 'layer_wise',
335
+ 'num_layers': 12,
336
+ },
337
+ optimizer=dict(
338
+ type='AdamW',
339
+ # lr=0.0001,
340
+ # lr=0.01,
341
+ lr=0.001,
342
+ betas=(0.9, 0.999),
343
+ weight_decay=0.1,
344
+ ))
345
+
346
+ # 100 ep = 184375 iters * 64 images/iter / 118000 images/ep
347
+ # max_iters = 184375
348
+ # interval = 5000
349
+ max_iters = 100000
350
+
351
+ # interval = 2000
352
+ interval = 1000
353
+
354
+ dynamic_intervals = [(max_iters // interval * interval + 1, max_iters)]
355
+ param_scheduler = [
356
+ dict(
357
+ type='LinearLR',
358
+ start_factor=0.001,
359
+ by_epoch=False,
360
+ begin=0,
361
+ end=250
362
+ ),
363
+ dict(
364
+ type='MultiStepLR',
365
+ begin=0,
366
+
367
+ end=max_iters,
368
+ # end=max_epochs,
369
+
370
+ by_epoch=False,
371
+ # by_epoch=True,
372
+
373
+ # 88 ep = [163889 iters * 64 images/iter / 118000 images/ep
374
+ # 96 ep = [177546 iters * 64 images/iter / 118000 images/ep
375
+ # milestones=[20, 29],
376
+ # milestones=[5000, 6000],
377
+ milestones=[1000, 2000],
378
+ gamma=0.1
379
+ )
380
+ ]
381
+
382
+ train_cfg = dict(
383
+ type='IterBasedTrainLoop',
384
+ max_iters=max_iters,
385
+ val_interval=interval,
386
+ dynamic_intervals=dynamic_intervals
387
+ )
388
+ # train_cfg = dict(
389
+ # type='EpochBasedTrainLoop',
390
+ # max_epochs=max_epochs,
391
+ # val_interval=1
392
+ # )
393
+
394
+ val_cfg = dict(type='ValLoop')
395
+ test_cfg = dict(type='TestLoop')
396
+
397
+ default_hooks = dict(
398
+ logger=dict(
399
+ type='LoggerHook',
400
+ interval=50,
401
+ log_metric_by_epoch=False
402
+ ),
403
+ checkpoint=dict(
404
+ type='CheckpointHook',
405
+ by_epoch=False,
406
+ # by_epoch=True,
407
+ save_last=True,
408
+ # interval=1,
409
+ interval=interval,
410
+ save_best=['coco/bbox_mAP', 'coco/bbox_mAP_50'],
411
+ max_keep_ckpts=2
412
+ )
413
+ )
414
+
415
+ vis_backends = [
416
+ dict(type='LocalVisBackend'),
417
+ dict(type='TensorboardVisBackend')
418
+ ]
419
+
420
+ visualizer = dict(
421
+ type='DetLocalVisualizer',
422
+ vis_backends=vis_backends,
423
+ name='visualizer'
424
+ )
425
+
426
+ log_processor = dict(
427
+ type='LogProcessor',
428
+ window_size=50,
429
+ by_epoch=False
430
+ # by_epoch=True
431
+ )
432
+
433
+ auto_scale_lr = dict(base_batch_size=64)
434
+
435
+
YOLOv5/best_coco_bbox_mAP_50_epoch_429.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89a69bbeb369be3ee42ae83b32ad691c22a9cae5173abb999fc46080b73b19e0
3
+ size 173121983
YOLOv5/configs/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco-ReLINZ-112px_RndSmpl_SmallOnly_BBoxes:42.36px_IoU:0.500_Balanced:0.20.py ADDED
@@ -0,0 +1,222 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = './yolov5_m-v61_syncbn_fast_8xb16-300e_coco.py'
2
+ deepen_factor = 0.67
3
+ widen_factor = 0.75
4
+
5
+ # TRAIN DATASET
6
+ data_root_train = '/var/storage/Common/SatelliteVehicles/Datasets/Real/Real-LINZ_112px_0.125m_RndSmpl_Imgs:all_Anno:small-only/train/'
7
+
8
+ # VAL DATASET
9
+ # data_root_val = '/var/storage/Common/SatelliteVehicles/Datasets/Real/Real-LINZ_112px_0.125m_RndSmpl_Imgs:all_Anno:small-only/validation/'
10
+ data_root_val = '/var/storage/Common/SatelliteVehicles/Datasets/Real/Real-LINZ_112px_0.125m_RndSmpl_Imgs:all_Anno:small-only/validation_subset025.0_seed0/'
11
+
12
+ # TEST DATASET
13
+ ## LINZ
14
+ data_root_test = '/var/storage/Common/SatelliteVehicles/Datasets/Real/Real-LINZ_112px_0.125m_RndSmpl_Imgs:all_Anno:small-only/test/'
15
+
16
+ ## Utah
17
+ # data_root_test = '/var/storage/Common/SatelliteVehicles/Datasets/Real/Real-Utah_112px_0.125m_RndSmpl_Imgs:all_Anno:small-only/test/'
18
+
19
+
20
+ class_name = ('small',)
21
+ num_classes = len(class_name)
22
+ metainfo = dict(classes=class_name, palette=[(20, 220, 60)])
23
+
24
+ img_scale = (128, 128)
25
+ # img_scale = (112, 112)
26
+
27
+ # Estimated with " python ./tools/analysis_tools/optimize_anchors.py --input-shape 128 128 --augment-args 0.1 1.9 --algorithm v5-k-means configs/..."
28
+ # anchors = [[(25, 32), (53, 69), (159, 220)], [(235, 166), (242, 242), (310, 337)], [(365, 375), (230, 681), (679, 324)]]
29
+ # anchors = [[(157, 155), (239, 133), (136, 238)], [(240, 165), (170, 237), (236, 191)], [(206, 240), (241, 217), (242, 242)]]
30
+ anchors = [[(31, 28), (32, 37), (27, 48)], [(48, 27), (47, 34), (34, 48)], [(41, 48), (49, 41), (48, 48)]]
31
+
32
+ max_epochs = 1000 # 40
33
+ train_batch_size_per_gpu = 200
34
+ validation_batch_size_per_gpu = 100
35
+ test_batch_size_per_gpu = 200 #768 #384
36
+ train_num_workers = 8
37
+
38
+ num_det_layers = 3
39
+
40
+ # Learning rate
41
+ base_lr = 0.01 #0.01
42
+ lr_factor = 0.1
43
+
44
+ load_from = 'https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco/yolov5_m-v61_syncbn_fast_8xb16-300e_coco_20220917_204944-516a710f.pth'
45
+
46
+ batch_shapes_cfg = dict(
47
+ img_size=img_scale[0],
48
+ batch_size=train_batch_size_per_gpu
49
+ )
50
+
51
+ pre_transform = _base_.pre_transform
52
+ affine_scale = _base_.affine_scale
53
+ mosaic_affine_pipeline = [
54
+ dict(
55
+ type='Mosaic',
56
+ img_scale=img_scale,
57
+ pad_val=114.0,
58
+ pre_transform=pre_transform),
59
+ dict(
60
+ type='YOLOv5RandomAffine',
61
+ max_rotate_degree=0.0,
62
+ max_shear_degree=0.0,
63
+ scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
64
+ # img_scale is (width, height)
65
+ border=(-img_scale[0] // 2, -img_scale[1] // 2),
66
+ border_val=(114, 114, 114))
67
+ ]
68
+
69
+ train_pipeline = [
70
+ *pre_transform,
71
+ *mosaic_affine_pipeline,
72
+ dict(
73
+ type='YOLOv5MixUp',
74
+ prob=_base_.mixup_prob,
75
+ pre_transform=[*pre_transform, *mosaic_affine_pipeline]),
76
+ dict(
77
+ type='mmdet.Albu',
78
+ transforms=_base_.albu_train_transforms,
79
+ bbox_params=dict(
80
+ type='BboxParams',
81
+ format='pascal_voc',
82
+ label_fields=['gt_bboxes_labels', 'gt_ignore_flags']),
83
+ keymap={
84
+ 'img': 'image',
85
+ 'gt_bboxes': 'bboxes'
86
+ }),
87
+ dict(type='YOLOv5HSVRandomAug'),
88
+ dict(type='mmdet.RandomFlip', prob=0.5),
89
+ dict(
90
+ type='mmdet.PackDetInputs',
91
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip',
92
+ 'flip_direction'))
93
+ ]
94
+
95
+ _base_.test_pipeline[next(i for i, v in enumerate(_base_.test_pipeline) if v.type=='YOLOv5KeepRatioResize')].scale = img_scale
96
+ _base_.test_pipeline[next(i for i, v in enumerate(_base_.test_pipeline) if v.type=='LetterResize')].scale = img_scale
97
+
98
+
99
+ model = dict(
100
+ backbone=dict(
101
+ deepen_factor=deepen_factor,
102
+ widen_factor=widen_factor,
103
+ ),
104
+ neck=dict(
105
+ deepen_factor=deepen_factor,
106
+ widen_factor=widen_factor,
107
+ in_channels=[256, 512, 1024],
108
+ out_channels=[256, 512, 1024],
109
+ num_csp_blocks=3,
110
+ ),
111
+ bbox_head=dict(
112
+ head_module=dict(
113
+ widen_factor=widen_factor,
114
+ num_classes=num_classes,
115
+ featmap_strides=[8, 16, 32],
116
+ in_channels=[256, 512, 1024],
117
+ num_base_priors=3
118
+ ),
119
+ prior_generator=dict(
120
+ base_sizes=anchors,
121
+ strides=[
122
+ 8,
123
+ 16,
124
+ 32,
125
+ ],
126
+ ),
127
+ loss_obj=dict(
128
+ loss_weight=_base_.loss_obj_weight * ((img_scale[0] / 640)**2 * 3 / num_det_layers)
129
+ ),
130
+ loss_cls=dict(
131
+ loss_weight=_base_.loss_cls_weight * (num_classes / 80 * 3 / num_det_layers)
132
+ ),
133
+ loss_bbox=dict(
134
+ loss_weight=_base_.loss_bbox_weight * (3 / num_det_layers),
135
+ ),
136
+ obj_level_weights=[
137
+ 4.0,
138
+ 1.0,
139
+ 0.4,
140
+ ],
141
+ ),
142
+ test_cfg=dict(
143
+ nms=dict(type='nms', iou_threshold=0.65), # NMS type and threshold
144
+ multi_label=False,
145
+ ),
146
+
147
+ )
148
+
149
+
150
+ train_dataloader = dict(
151
+ batch_size=train_batch_size_per_gpu,
152
+ num_workers=train_num_workers,
153
+ dataset=dict(
154
+ _delete_=True,
155
+ type='RepeatDataset',
156
+ times=1,
157
+ dataset=dict(
158
+ type='YOLOv5CocoDataset',
159
+ data_root=data_root_train,
160
+ ann_file=data_root_train + 'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500_BalancedRatio:0.2000.json',
161
+ data_prefix=dict(img='images/'),
162
+ metainfo=metainfo,
163
+ filter_cfg=dict(filter_empty_gt=False),
164
+ pipeline=train_pipeline
165
+ )
166
+ )
167
+ )
168
+
169
+ val_dataloader = dict(
170
+ batch_size=validation_batch_size_per_gpu,
171
+ num_workers=train_num_workers,
172
+ dataset=dict(
173
+ data_root=data_root_val,
174
+ metainfo=metainfo,
175
+ ann_file=data_root_val+'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
176
+ data_prefix=dict(img='images/'),
177
+ pipeline=_base_.test_pipeline
178
+ )
179
+ )
180
+
181
+ test_dataloader = dict(
182
+ batch_size=test_batch_size_per_gpu,
183
+ num_workers=train_num_workers,
184
+ dataset=dict(
185
+ data_root=data_root_test,
186
+ metainfo=metainfo,
187
+ ann_file=data_root_test+'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
188
+ data_prefix=dict(img='images/'),
189
+ batch_shapes_cfg=batch_shapes_cfg,
190
+ pipeline=_base_.test_pipeline
191
+ )
192
+ )
193
+
194
+ _base_.optim_wrapper.optimizer.batch_size_per_gpu = train_batch_size_per_gpu
195
+ _base_.optim_wrapper.optimizer.lr = base_lr
196
+
197
+ val_evaluator = dict(
198
+ ann_file=data_root_val+'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
199
+ )
200
+ test_evaluator = dict(
201
+ ann_file=data_root_test+'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
202
+ )
203
+
204
+
205
+ default_hooks = dict(
206
+ checkpoint=dict(
207
+ interval=1,
208
+ max_keep_ckpts=1,
209
+ save_best=['coco/bbox_mAP', 'coco/bbox_mAP_50']
210
+ ),
211
+ # The warmup_mim_iter parameter is critical.
212
+ # The default value is 1000 which is not suitable for cat datasets.
213
+ param_scheduler=dict(
214
+ max_epochs=max_epochs,
215
+ warmup_mim_iter=1000,
216
+ lr_factor=lr_factor
217
+ ),
218
+ logger=dict(type='LoggerHook', interval=5))
219
+
220
+ train_cfg = dict(max_epochs=max_epochs, val_interval=1)
221
+ visualizer = dict(vis_backends=[dict(type='LocalVisBackend'), dict(type='TensorboardVisBackend')])
222
+
YOLOv8/best_coco_bbox_mAP_50_epoch_32.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5f825c0b53ef12637277dc17b6b7ea9a3375bba2c1a4d7d700e83229edae25d
3
+ size 209487703
YOLOv8/configs/yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco-ReLINZ-112px-RndSmpl_Imgs:all_Anno:small-only_FakeBBoxes:42.36px_IoU:0.500_Val:ReLINZ.py ADDED
@@ -0,0 +1,245 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = './yolov8_s_syncbn_fast_8xb16-500e_coco.py'
2
+
3
+ # ========================modified parameters======================
4
+ deepen_factor = 0.67
5
+ widen_factor = 0.75
6
+ last_stage_out_channels = 768
7
+
8
+ affine_scale = 0.9
9
+ mixup_prob = 0.1
10
+
11
+
12
+ img_scale = (128, 128) #_base_.img_scale
13
+ # img_scale = (640, 640) #_base_.img_scale
14
+ num_classes = 1
15
+ class_name = ('small',)
16
+ num_classes = len(class_name)
17
+ metainfo = dict(classes=class_name, palette=[(20, 220, 60)])
18
+
19
+ train_batch_size_per_gpu = 512
20
+ val_batch_size_per_gpu = 128
21
+ test_batch_size_per_gpu = 128
22
+
23
+ train_num_workers = 16
24
+ val_num_workers = 16
25
+ test_num_workers = 16
26
+
27
+ # -----train val related-----
28
+ # Base learning rate for optim_wrapper. Corresponding to 8xb16=64 bs
29
+ base_lr = 0.001
30
+ lr_factor = 0.01 # Learning rate scaling factor
31
+ max_epochs = 1000 # Maximum training epochs
32
+
33
+ # Disable mosaic augmentation for final 10 epochs (stage 2)
34
+ close_mosaic_epochs = 10
35
+
36
+ save_epoch_intervals = 1
37
+ max_keep_ckpts = 2
38
+
39
+ # validation intervals in stage 2
40
+ val_interval_stage2 = 1
41
+
42
+ # TRAIN DATASET
43
+ data_root_train = '/var/storage/Common/SatelliteVehicles/Datasets/Real/Real-LINZ_112px_0.125m_RndSmpl_Imgs:all_Anno:small-only/train/'
44
+ ann_file_train = 'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json'
45
+
46
+ # VAL DATASET
47
+ # data_root_val = '/var/storage/Common/SatelliteVehicles/Datasets/Real/Real-LINZ_112px_0.125m_RndSmpl_Imgs:all_Anno:small-only/validation/'
48
+ data_root_val = '/var/storage/Common/SatelliteVehicles/Datasets/Real/Real-LINZ_112px_0.125m_RndSmpl_Imgs:all_Anno:small-only/validation_subset025.0_seed0/'
49
+ ann_file_val = 'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json'
50
+
51
+ # TEST DATASET
52
+ ## LINZ
53
+ data_root_test = '/var/storage/Common/SatelliteVehicles/Datasets/Real/Real-LINZ_112px_0.125m_RndSmpl_Imgs:all_Anno:small-only/test/'
54
+ ann_file_test = 'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json'
55
+
56
+ ## Utah
57
+ # data_root_test = '/var/storage/Common/SatelliteVehicles/Datasets/Real/Real-Utah_112px_0.125m_RndSmpl_Imgs:all_Anno:small-only/test/'
58
+ # ann_file_test = 'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json'
59
+
60
+
61
+ load_from = 'https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco/yolov8_m_syncbn_fast_8xb16-500e_coco_20230115_192200-c22e560a.pth'
62
+
63
+
64
+ # =======================Unmodified in most cases==================
65
+ pre_transform = _base_.pre_transform
66
+ last_transform = _base_.last_transform
67
+
68
+ model = dict(
69
+ backbone=dict(
70
+ last_stage_out_channels=last_stage_out_channels,
71
+ deepen_factor=deepen_factor,
72
+ widen_factor=widen_factor
73
+ ),
74
+ neck=dict(
75
+ deepen_factor=deepen_factor,
76
+ widen_factor=widen_factor,
77
+ in_channels=[256, 512, last_stage_out_channels],
78
+ out_channels=[256, 512, last_stage_out_channels]
79
+ ),
80
+ bbox_head=dict(
81
+ head_module=dict(
82
+ num_classes=num_classes,
83
+ widen_factor=widen_factor,
84
+ in_channels=[256, 512, last_stage_out_channels])
85
+ ),
86
+ train_cfg=dict(
87
+ assigner=dict(
88
+ num_classes=num_classes
89
+ )
90
+ )
91
+ )
92
+
93
+ mosaic_affine_transform = [
94
+ dict(
95
+ type='Mosaic',
96
+ img_scale=img_scale,
97
+ pad_val=114.0,
98
+ pre_transform=pre_transform),
99
+ dict(
100
+ type='YOLOv5RandomAffine',
101
+ max_rotate_degree=0.0,
102
+ max_shear_degree=0.0,
103
+ max_aspect_ratio=100,
104
+ scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
105
+ # img_scale is (width, height)
106
+ border=(-img_scale[0] // 2, -img_scale[1] // 2),
107
+ border_val=(114, 114, 114))
108
+ ]
109
+
110
+ # enable mixup
111
+ train_pipeline = [
112
+ *pre_transform, *mosaic_affine_transform,
113
+ dict(
114
+ type='YOLOv5MixUp',
115
+ prob=mixup_prob,
116
+ pre_transform=[*pre_transform, *mosaic_affine_transform]),
117
+ *last_transform
118
+ ]
119
+
120
+ train_pipeline_stage2 = [
121
+ *pre_transform,
122
+ dict(type='YOLOv5KeepRatioResize', scale=img_scale),
123
+ dict(
124
+ type='LetterResize',
125
+ scale=img_scale,
126
+ allow_scale_up=True,
127
+ pad_val=dict(img=114.0)
128
+ ),
129
+ dict(
130
+ type='YOLOv5RandomAffine',
131
+ max_rotate_degree=0.0,
132
+ max_shear_degree=0.0,
133
+ scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
134
+ max_aspect_ratio=100,
135
+ border_val=(114, 114, 114)
136
+ ),
137
+ *last_transform
138
+ ]
139
+
140
+ train_dataloader = dict(
141
+ batch_size=train_batch_size_per_gpu,
142
+ num_workers=train_num_workers,
143
+ dataset=dict(
144
+ data_root=data_root_train,
145
+ ann_file=data_root_train+ann_file_train,
146
+ data_prefix=dict(img='images/'),
147
+ filter_cfg=dict(filter_empty_gt=False),
148
+ metainfo=metainfo,
149
+ pipeline=train_pipeline
150
+ )
151
+ )
152
+
153
+ # _base_.test_pipeline[1].img_scale = img_scale
154
+ # _base_.test_pipeline[2].scale = img_scale
155
+
156
+ test_pipeline = [
157
+ dict(type='LoadImageFromFile', backend_args=_base_.backend_args),
158
+ dict(type='YOLOv5KeepRatioResize', scale=img_scale),
159
+ dict(
160
+ type='LetterResize',
161
+ scale=img_scale,
162
+ allow_scale_up=False,
163
+ pad_val=dict(img=114)),
164
+ dict(type='LoadAnnotations', with_bbox=True, _scope_='mmdet'),
165
+ dict(
166
+ type='mmdet.PackDetInputs',
167
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
168
+ 'scale_factor', 'pad_param'))
169
+ ]
170
+
171
+ val_dataloader = dict(
172
+ batch_size=val_batch_size_per_gpu,
173
+ num_workers=val_num_workers,
174
+ dataset=dict(
175
+ data_root=data_root_val,
176
+ ann_file=data_root_val+ann_file_val,
177
+ data_prefix=dict(img='images/'),
178
+ metainfo=metainfo,
179
+ # filter_cfg=dict(filter_empty_gt=False), # Does this make a change?
180
+ filter_cfg=dict(filter_empty_gt=True), # Does this make a change?
181
+ pipeline=test_pipeline,
182
+ )
183
+ )
184
+
185
+ test_dataloader = dict(
186
+ batch_size=test_batch_size_per_gpu,
187
+ num_workers=test_num_workers,
188
+ dataset=dict(
189
+ data_root=data_root_test,
190
+ ann_file=data_root_test+ann_file_test,
191
+ data_prefix=dict(img='images/'),
192
+ metainfo=metainfo,
193
+ filter_cfg=dict(filter_empty_gt=False), # Does this make a change?
194
+ pipeline=test_pipeline,
195
+ )
196
+ )
197
+
198
+
199
+ optim_wrapper = dict(
200
+ optimizer=dict(
201
+ lr=base_lr,
202
+ batch_size_per_gpu=train_batch_size_per_gpu
203
+ ),
204
+ )
205
+
206
+
207
+ default_hooks = dict(
208
+ param_scheduler=dict(
209
+ lr_factor=lr_factor,
210
+ max_epochs=max_epochs
211
+ ),
212
+ checkpoint=dict(
213
+ interval=save_epoch_intervals,
214
+ max_keep_ckpts=max_keep_ckpts,
215
+ save_best=['coco/bbox_mAP', 'coco/bbox_mAP_50']
216
+ )
217
+ )
218
+
219
+ _base_.custom_hooks[1].switch_epoch = max_epochs - close_mosaic_epochs
220
+ _base_.custom_hooks[1].switch_pipeline = train_pipeline_stage2
221
+
222
+ val_evaluator = dict(
223
+ ann_file=data_root_val + ann_file_val,
224
+ )
225
+
226
+ test_evaluator = dict(
227
+ ann_file= data_root_test + ann_file_test,
228
+ )
229
+
230
+ train_cfg = dict(
231
+ max_epochs=max_epochs,
232
+ val_interval=save_epoch_intervals,
233
+ dynamic_intervals=[
234
+ ((max_epochs - close_mosaic_epochs),
235
+ val_interval_stage2)
236
+ ]
237
+ )
238
+
239
+
240
+ visualizer = dict(
241
+ vis_backends=[
242
+ dict(type='LocalVisBackend'),
243
+ dict(type='TensorboardVisBackend')
244
+ ]
245
+ )