FasterRCNN/configs/faster_rcnn/faster-rcnn.py DELETED
@@ -1,407 +0,0 @@
1
- _base_ = [
2
- '../_base_/models/faster-rcnn_r50_fpn.py',
3
- '../_base_/datasets/coco_detection.py',
4
- '../_base_/schedules/schedule_2x.py', '../_base_/default_runtime.py'
5
- ]
6
-
7
-
8
-
9
- # TRAIN DATASET
10
- data_root_train = 'YOUR_PATH_TO_REAL_LINZ_TRAIN'
11
-
12
- # VAL DATASET
13
- data_root_val = 'YOUR_PATH_TO_REAL_LINZ_VAL'
14
-
15
- # TEST DATASET
16
- ## LINZ
17
- data_root_test = 'YOUR_PATH_TO_REAL_LINZ_TEST'
18
-
19
-
20
- max_epochs = 1000 # 40
21
- train_batch_size_per_gpu = 64
22
- validation_batch_size_per_gpu = 64
23
- test_batch_size_per_gpu = 64
24
- num_workers = 8
25
-
26
-
27
- class_name = ('small',)
28
- num_classes = len(class_name)
29
- metainfo = dict(classes=class_name, palette=[(20, 220, 60)])
30
-
31
- img_scale = (128, 128)
32
-
33
- affine_scale = 0.9
34
-
35
- load_from = 'https://download.openxlab.org.cn/models/mmdetection/FasterR-CNN/weight/faster-rcnn_r50_fpn_2x_coco'
36
-
37
-
38
- # model settings
39
- model = dict(
40
- type='FasterRCNN',
41
- data_preprocessor=dict(
42
- type='DetDataPreprocessor',
43
- mean=[123.675, 116.28, 103.53],
44
- std=[58.395, 57.12, 57.375],
45
- bgr_to_rgb=True,
46
- pad_size_divisor=32),
47
- backbone=dict(
48
- type='ResNet',
49
- depth=50,
50
- num_stages=4,
51
- out_indices=(0, 1, 2, 3),
52
- frozen_stages=1,
53
- norm_cfg=dict(type='BN', requires_grad=True),
54
- norm_eval=True,
55
- style='pytorch',
56
- init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
57
- neck=dict(
58
- type='FPN',
59
- in_channels=[256, 512, 1024, 2048],
60
- out_channels=256,
61
- num_outs=5),
62
- rpn_head=dict(
63
- type='RPNHead',
64
- in_channels=256,
65
- feat_channels=256,
66
- anchor_generator=dict(
67
- type='AnchorGenerator',
68
- scales=[8],
69
- ratios=[0.5, 1.0, 2.0],
70
- strides=[4, 8, 16, 32, 64]),
71
- bbox_coder=dict(
72
- type='DeltaXYWHBBoxCoder',
73
- target_means=[.0, .0, .0, .0],
74
- target_stds=[1.0, 1.0, 1.0, 1.0]),
75
- loss_cls=dict(
76
- type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
77
- loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
78
- roi_head=dict(
79
- type='StandardRoIHead',
80
- bbox_roi_extractor=dict(
81
- type='SingleRoIExtractor',
82
- roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
83
- out_channels=256,
84
- featmap_strides=[4, 8, 16, 32]),
85
- bbox_head=dict(
86
- type='Shared2FCBBoxHead',
87
- in_channels=256,
88
- fc_out_channels=1024,
89
- roi_feat_size=7,
90
- num_classes=num_classes,
91
- bbox_coder=dict(
92
- type='DeltaXYWHBBoxCoder',
93
- target_means=[0., 0., 0., 0.],
94
- target_stds=[0.1, 0.1, 0.2, 0.2]),
95
- reg_class_agnostic=False,
96
- loss_cls=dict(
97
- type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
98
- loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
99
- # model training and testing settings
100
- train_cfg=dict(
101
- rpn=dict(
102
- assigner=dict(
103
- type='MaxIoUAssigner',
104
- pos_iou_thr=0.7,
105
- neg_iou_thr=0.3,
106
- min_pos_iou=0.3,
107
- match_low_quality=True,
108
- ignore_iof_thr=-1),
109
- sampler=dict(
110
- type='RandomSampler',
111
- num=256,
112
- pos_fraction=0.5,
113
- neg_pos_ub=-1,
114
- add_gt_as_proposals=False),
115
- allowed_border=-1,
116
- pos_weight=-1,
117
- debug=False),
118
- rpn_proposal=dict(
119
- nms_pre=2000,
120
- max_per_img=1000,
121
- nms=dict(type='nms', iou_threshold=0.7),
122
- min_bbox_size=0),
123
- rcnn=dict(
124
- assigner=dict(
125
- type='MaxIoUAssigner',
126
- pos_iou_thr=0.5,
127
- neg_iou_thr=0.5,
128
- min_pos_iou=0.5,
129
- match_low_quality=False,
130
- ignore_iof_thr=-1),
131
- sampler=dict(
132
- type='RandomSampler',
133
- num=512,
134
- pos_fraction=0.25,
135
- neg_pos_ub=-1,
136
- add_gt_as_proposals=True),
137
- pos_weight=-1,
138
- debug=False)),
139
- test_cfg=dict(
140
- rpn=dict(
141
- nms_pre=1000,
142
- max_per_img=1000,
143
- nms=dict(type='nms', iou_threshold=0.7),
144
- min_bbox_size=0),
145
- rcnn=dict(
146
- score_thr=0.05,
147
- nms=dict(type='nms', iou_threshold=0.5),
148
- max_per_img=100)
149
- # soft-nms is also supported for rcnn testing
150
- # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05)
151
- ))
152
-
153
-
154
-
155
- dataset_type = 'CocoDataset'
156
-
157
- backend_args = None
158
-
159
- # Original
160
- # train_pipeline = [
161
- # dict(type='LoadImageFromFile', backend_args=backend_args),
162
- # dict(type='LoadAnnotations', with_bbox=True),
163
- # dict(type='Resize', scale=img_scale, keep_ratio=True),
164
- # dict(type='RandomFlip', prob=0.5),
165
- # dict(type='PackDetInputs')
166
- # ]
167
-
168
- pre_transform = [
169
- dict(type='LoadImageFromFile', backend_args=backend_args),
170
- dict(type='LoadAnnotations', with_bbox=True)
171
- ]
172
-
173
- albu_train_transforms = [
174
- dict(type='Blur', p=0.01),
175
- dict(type='MedianBlur', p=0.01),
176
- dict(type='ToGray', p=0.01),
177
- dict(type='CLAHE', p=0.01)
178
- ]
179
-
180
- last_transform = [
181
- dict(
182
- type='Albu',
183
- transforms=albu_train_transforms,
184
- bbox_params=dict(
185
- type='BboxParams',
186
- format='pascal_voc',
187
- label_fields=['gt_bboxes_labels', 'gt_ignore_flags']),
188
- keymap={
189
- 'img': 'image',
190
- 'gt_bboxes': 'bboxes'
191
- }),
192
- dict(type='YOLOXHSVRandomAug'), # ???
193
- dict(type='RandomFlip', prob=0.5),
194
- dict(
195
- type='PackDetInputs',
196
- meta_keys=(
197
- 'img_id',
198
- 'img_path',
199
- 'ori_shape',
200
- 'img_shape',
201
- 'flip',
202
- 'flip_direction'
203
- )
204
- )
205
- ]
206
-
207
- mosaic_affine_transform = [
208
- dict(
209
- type='Mosaic',
210
- img_scale=img_scale,
211
- pad_val=114.0,
212
- ),
213
- dict(
214
- type='RandomAffine',
215
- max_rotate_degree=0.0,
216
- max_shear_degree=0.0,
217
- scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
218
- # img_scale is (width, height)
219
- border=(-img_scale[0] // 2, -img_scale[1] // 2),
220
- border_val=(114, 114, 114))
221
- ]
222
-
223
- train_pipeline = [
224
- *pre_transform,
225
- *mosaic_affine_transform,
226
- dict(
227
- type='MixUp',
228
- img_scale=img_scale,
229
- ),
230
- *last_transform
231
- ]
232
-
233
-
234
- # Original
235
- # train_dataloader = dict(
236
- # batch_size=train_batch_size_per_gpu,
237
- # num_workers=num_workers,
238
- # persistent_workers=True,
239
- # sampler=dict(type='DefaultSampler', shuffle=True),
240
- # batch_sampler=dict(type='AspectRatioBatchSampler'),
241
- # dataset=dict(
242
- # type=dataset_type,
243
- # data_root=data_root_train,
244
- # ann_file='annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500_BalancedRatio:0.2000.json',
245
- # data_prefix=dict(img='images/'),
246
- # filter_cfg=dict(filter_empty_gt=False, min_size=32),
247
- # pipeline=train_pipeline,
248
- # metainfo=metainfo,
249
- # backend_args=backend_args
250
- # )
251
- # )
252
-
253
- train_dataloader = dict(
254
- batch_size=train_batch_size_per_gpu,
255
- num_workers=num_workers,
256
- persistent_workers=True,
257
- sampler=dict(type='DefaultSampler', shuffle=True),
258
- batch_sampler=dict(type='AspectRatioBatchSampler'),
259
- dataset=dict(
260
- _delete_=True,
261
- type='MultiImageMixDataset',
262
- dataset=dict(
263
- type=dataset_type,
264
- data_root=data_root_train,
265
- ann_file='annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
266
- data_prefix=dict(img='images/'),
267
- filter_cfg=dict(filter_empty_gt=False, min_size=32),
268
- metainfo=metainfo,
269
- backend_args=backend_args,
270
- pipeline=pre_transform
271
- ),
272
- pipeline=train_pipeline,
273
- )
274
- )
275
-
276
-
277
-
278
- test_pipeline = [
279
- dict(type='LoadImageFromFile', backend_args=backend_args),
280
- dict(type='Resize', scale=img_scale, keep_ratio=True),
281
- # If you don't have a gt annotation, delete the pipeline
282
- dict(type='LoadAnnotations', with_bbox=True),
283
- dict(
284
- type='PackDetInputs',
285
- meta_keys=(
286
- 'img_id', 'img_path', 'ori_shape', 'img_shape',
287
- 'scale_factor'
288
- )
289
- )
290
- ]
291
-
292
-
293
- val_dataloader = dict(
294
- batch_size=validation_batch_size_per_gpu,
295
- num_workers=num_workers,
296
- persistent_workers=True,
297
- drop_last=False,
298
- sampler=dict(type='DefaultSampler', shuffle=False),
299
- dataset=dict(
300
- type=dataset_type,
301
- data_root=data_root_val,
302
- ann_file='annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
303
- data_prefix=dict(img='images/'),
304
- filter_cfg=dict(filter_empty_gt=False, min_size=32),
305
- test_mode=True,
306
- pipeline=test_pipeline,
307
- metainfo=metainfo,
308
- backend_args=backend_args
309
- )
310
- )
311
-
312
- test_dataloader = dict(
313
- batch_size=test_batch_size_per_gpu,
314
- num_workers=num_workers,
315
- persistent_workers=True,
316
- drop_last=False,
317
- sampler=dict(type='DefaultSampler', shuffle=False),
318
- dataset=dict(
319
- type=dataset_type,
320
- data_root=data_root_test,
321
- ann_file='annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
322
- data_prefix=dict(img='images/'),
323
- filter_cfg=dict(filter_empty_gt=False, min_size=32),
324
- test_mode=True,
325
- pipeline=test_pipeline,
326
- metainfo=metainfo,
327
- backend_args=backend_args
328
- )
329
- )
330
-
331
- # test_dataloader = val_dataloader
332
-
333
- val_evaluator = dict(
334
- type='CocoMetric',
335
- ann_file=data_root_val + 'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
336
- metric='bbox',
337
- format_only=False,
338
- backend_args=backend_args
339
- )
340
-
341
- test_evaluator = dict(
342
- type='CocoMetric',
343
- ann_file=data_root_test + 'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
344
- metric='bbox',
345
- format_only=False,
346
- backend_args=backend_args
347
- )
348
-
349
- # test_evaluator = val_evaluator
350
-
351
-
352
-
353
- # training schedule for 2x
354
- train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=max_epochs, val_interval=1)
355
- val_cfg = dict(type='ValLoop')
356
- test_cfg = dict(type='TestLoop')
357
-
358
- # learning rate
359
- param_scheduler = [
360
- dict(
361
- type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500),
362
- dict(
363
- type='MultiStepLR',
364
- begin=0,
365
- end=max_epochs,
366
- by_epoch=True,
367
- milestones=[16, 22],
368
- gamma=0.1)
369
- ]
370
-
371
- # optimizer
372
- optim_wrapper = dict(
373
- type='OptimWrapper',
374
- optimizer=dict(
375
- type='SGD',
376
- lr=0.2,
377
- momentum=0.9,
378
- weight_decay=0.0001
379
- )
380
- )
381
-
382
- # Default setting for scaling LR automatically
383
- # - `enable` means enable scaling LR automatically
384
- # or not by default.
385
- # - `base_batch_size` = (8 GPUs) x (2 samples per GPU)
386
- auto_scale_lr = dict(enable=False, base_batch_size=train_batch_size_per_gpu)
387
-
388
-
389
- default_hooks = dict(
390
- checkpoint=dict(
391
- interval=1,
392
- max_keep_ckpts=1,
393
- save_best=['coco/bbox_mAP', 'coco/bbox_mAP_50']
394
- ),
395
- # The warmup_mim_iter parameter is critical.
396
- # The default value is 1000 which is not suitable for cat datasets.
397
- # param_scheduler=dict(
398
- # max_epochs=max_epochs,
399
- # warmup_mim_iter=1000,
400
- # lr_factor=lr_factor
401
- # ),
402
- logger=dict(type='LoggerHook', interval=5))
403
-
404
- vis_backends = [dict(type='LocalVisBackend'), dict(type='TensorboardVisBackend')]
405
- visualizer = dict(
406
- type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
407
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
README.md CHANGED
@@ -3,16 +3,4 @@ license: cc-by-nc-4.0
3
  language:
4
  - en
5
  pipeline_tag: object-detection
6
- library_name: mmdetection
7
- ---
8
- ## Introduction
9
- We introduce a real-world aerial view dataset, LINZ, captured in Selwyn (New Zealand). The dataset has ground sampling distance (GSD) of 12.5 cm per px and has been sampled to 112 px Γ— 112 px image size. For data annotation, we label only the small vehicle centers. To leverage the abundance of bounding box-based open-source object detection frameworks, we define a fixed-size ground truth bounding box of 42.36 px Γ— 42.36 px centered at each vehicle. Annotations are provided in COCO format [x, y, w, h], where "small" in the annotation json files denotes the small vehicle class and (x, y) denotes the top-left corner of the bounding box. We use AP50 as the evaluation metric.
10
-
11
- ## Model Usage
12
- This folder contains four detectors trained on Real LINZ data and tested on Real LINZ data, along with configuration files we use for training and testing.
13
-
14
- ## References
15
-
16
- ➑️ **Paper:** [Adapting Vehicle Detectors for Aerial Imagery to Unseen Domains with Weak Supervision](https://arxiv.org/abs/2507.20976)
17
- ➑️ **Project Page:** [Webpage](https://humansensinglab.github.io/AGenDA/)
18
- ➑️ **Data:** [AGenDA](https://github.com/humansensinglab/AGenDA/tree/main/Data)
 
3
  language:
4
  - en
5
  pipeline_tag: object-detection
6
+ ---
 
 
 
 
 
 
 
 
 
 
 
 
ViTDet/projects/ViTDet/configs/vitdet.py DELETED
@@ -1,432 +0,0 @@
1
- _base_ = [
2
- '../../../configs/_base_/default_runtime.py',
3
- '../../../configs/_base_/models/mask-rcnn_r50_fpn.py',
4
- ]
5
-
6
- custom_imports = dict(imports=['projects.ViTDet.vitdet'])
7
-
8
-
9
- ## TRAIN DATASET
10
- data_root_train = 'YOUR_PATH_TO_REAL_LINZ_TRAIN'
11
-
12
-
13
- ## VALIDATION DATASET
14
- data_root_val = 'YOUR_PATH_TO_REAL_LINZ_VAL'
15
-
16
- # TEST DATASET
17
- ## LINZ
18
- data_root_test = 'YOUR_PATH_TO_REAL_LINZ_TEST'
19
-
20
-
21
- train_batch_size_per_gpu = 24
22
- val_batch_size_per_gpu = 12
23
- test_batch_size_per_gpu = 60
24
-
25
- num_workers = 8
26
-
27
- max_epochs = 100
28
-
29
-
30
- # img_scale = (1024, 1024)
31
- # img_scale = (384, 384)
32
- img_scale = (128, 128)
33
-
34
- affine_scale = 0.9
35
-
36
- class_name = ('small',)
37
- num_classes = len(class_name)
38
- metainfo = dict(classes=class_name, palette=[(20, 220, 60)])
39
-
40
-
41
-
42
- load_from = 'https://download.openmmlab.com/mmdetection/v3.0/vitdet/vitdet_mask-rcnn_vit-b-mae_lsj-100e/vitdet_mask-rcnn_vit-b-mae_lsj-100e_20230328_153519-e15fe294.pth'
43
-
44
-
45
- # MODEL SETTINGS
46
- backbone_norm_cfg = dict(type='LN', requires_grad=True)
47
- norm_cfg = dict(type='LN2d', requires_grad=True)
48
-
49
- batch_augments = [
50
- dict(type='BatchFixedSizePad', size=img_scale, pad_mask=True)
51
- ]
52
-
53
- model = dict(
54
- data_preprocessor=dict(pad_size_divisor=32, batch_augments=batch_augments),
55
- backbone=dict(
56
- _delete_=True,
57
- type='ViT',
58
- # img_size=1024,
59
- # img_size=384,
60
- img_size=img_scale[0],
61
- patch_size=16,
62
- embed_dim=768,
63
- depth=12,
64
- num_heads=12,
65
- drop_path_rate=0.1,
66
- window_size=14,
67
- mlp_ratio=4,
68
- qkv_bias=True,
69
- norm_cfg=backbone_norm_cfg,
70
- window_block_indexes=[
71
- 0,
72
- 1,
73
- 3,
74
- 4,
75
- 6,
76
- 7,
77
- 9,
78
- 10,
79
- ],
80
- use_rel_pos=True,
81
- init_cfg=dict(
82
- type='Pretrained',
83
- # checkpoint='mae_pretrain_vit_base.pth'
84
- # checkpoint='detectron2://ImageNetPretrained/MAE/mae_pretrain_vit_base.pth'
85
- checkpoint='vitdet_mask-rcnn_vit-b-mae_lsj-100e_20230328_153519-e15fe294.pth'
86
- )
87
- ),
88
- neck=dict(
89
- _delete_=True,
90
- type='SimpleFPN',
91
- backbone_channel=768,
92
- in_channels=[192, 384, 768, 768],
93
- out_channels=256,
94
- num_outs=5,
95
- norm_cfg=norm_cfg),
96
- rpn_head=dict(num_convs=2),
97
- roi_head=dict(
98
- bbox_head=dict(
99
- type='Shared4Conv1FCBBoxHead',
100
- conv_out_channels=256,
101
- norm_cfg=norm_cfg,
102
- num_classes=num_classes
103
- ),
104
- # mask_head=dict( # No masks as used
105
- # norm_cfg=norm_cfg,
106
- # num_classes=1,
107
- # loss_mask=dict(
108
- # use_mask=False
109
- # ),
110
- # )
111
- mask_head=None
112
- )
113
- )
114
-
115
- custom_hooks = [dict(type='Fp16CompresssionHook')]
116
-
117
-
118
-
119
-
120
-
121
-
122
-
123
-
124
- ##
125
- dataset_type = 'CocoDataset'
126
- backend_args = None
127
-
128
- # Original
129
- # train_pipeline = [
130
- # dict(type='LoadImageFromFile', backend_args=backend_args),
131
- # dict(
132
- # type='LoadAnnotations',
133
- # with_bbox=True,
134
- # # with_mask=True
135
- # with_mask=False
136
- # ),
137
- # dict(type='RandomFlip', prob=0.5),
138
- # dict(
139
- # type='RandomResize',
140
- # scale=img_scale,
141
- # ratio_range=(0.1, 2.0),
142
- # keep_ratio=True),
143
- # dict(
144
- # type='RandomCrop',
145
- # crop_type='absolute_range',
146
- # crop_size=img_scale,
147
- # recompute_bbox=True,
148
- # allow_negative_crop=True),
149
- # dict(type='FilterAnnotations', min_gt_bbox_wh=(1e-2, 1e-2)),
150
- # dict(type='Pad', size=img_scale, pad_val=dict(img=(114, 114, 114))),
151
- # dict(type='PackDetInputs')
152
- # ]
153
-
154
- pre_transform = [
155
- dict(type='LoadImageFromFile', backend_args=backend_args),
156
- dict(type='LoadAnnotations', with_bbox=True, with_mask=False)
157
- ]
158
-
159
- albu_train_transforms = [
160
- dict(type='Blur', p=0.01),
161
- dict(type='MedianBlur', p=0.01),
162
- dict(type='ToGray', p=0.01),
163
- dict(type='CLAHE', p=0.01)
164
- ]
165
-
166
- last_transform = [
167
- dict(
168
- type='Albu',
169
- transforms=albu_train_transforms,
170
- bbox_params=dict(
171
- type='BboxParams',
172
- format='pascal_voc',
173
- label_fields=['gt_bboxes_labels', 'gt_ignore_flags']),
174
- keymap={
175
- 'img': 'image',
176
- 'gt_bboxes': 'bboxes'
177
- }),
178
- dict(type='YOLOXHSVRandomAug'), # ???
179
- dict(type='RandomFlip', prob=0.5),
180
- dict(
181
- type='PackDetInputs',
182
- meta_keys=(
183
- 'img_id',
184
- 'img_path',
185
- 'ori_shape',
186
- 'img_shape',
187
- 'flip',
188
- 'flip_direction'
189
- )
190
- )
191
- ]
192
-
193
- mosaic_affine_transform = [
194
- dict(
195
- type='Mosaic',
196
- img_scale=img_scale,
197
- pad_val=114.0,
198
- ),
199
- dict(
200
- type='RandomAffine',
201
- max_rotate_degree=0.0,
202
- max_shear_degree=0.0,
203
- scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
204
- # img_scale is (width, height)
205
- border=(-img_scale[0] // 2, -img_scale[1] // 2),
206
- border_val=(114, 114, 114))
207
- ]
208
-
209
- train_pipeline = [
210
- *pre_transform,
211
- *mosaic_affine_transform,
212
- dict(
213
- type='MixUp',
214
- img_scale=img_scale,
215
- ),
216
- *last_transform
217
- ]
218
-
219
-
220
-
221
- # Original
222
- # train_dataloader = dict(
223
- # batch_size=train_batch_size_per_gpu,
224
- # num_workers=num_workers,
225
- # persistent_workers=True,
226
- # sampler=dict(type='DefaultSampler', shuffle=True),
227
- # dataset=dict(
228
- # type=dataset_type,
229
- # data_root=data_root_train,
230
- # ann_file=data_root_train + 'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
231
- # data_prefix=dict(img='images/'),
232
- # # filter_cfg=dict(filter_empty_gt=True, min_size=32),
233
- # filter_cfg=dict(filter_empty_gt=False),
234
- # pipeline=train_pipeline,
235
- # metainfo=metainfo,
236
- # )
237
- # )
238
-
239
- train_dataloader = dict(
240
- batch_size=train_batch_size_per_gpu,
241
- num_workers=num_workers,
242
- persistent_workers=True,
243
- sampler=dict(type='DefaultSampler', shuffle=True),
244
- batch_sampler=dict(type='AspectRatioBatchSampler'),
245
- dataset=dict(
246
- # _delete_=True,
247
- type='MultiImageMixDataset',
248
- dataset=dict(
249
- type=dataset_type,
250
- data_root=data_root_train,
251
- ann_file=data_root_train + 'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
252
- data_prefix=dict(img='images/'),
253
- filter_cfg=dict(filter_empty_gt=False, min_size=32),
254
- metainfo=metainfo,
255
- backend_args=backend_args,
256
- pipeline=pre_transform
257
- ),
258
- pipeline=train_pipeline,
259
- )
260
- )
261
-
262
-
263
- test_pipeline = [
264
- dict(type='LoadImageFromFile', backend_args=backend_args),
265
- dict(type='Resize', scale=img_scale, keep_ratio=True),
266
- dict(type='Pad', size=img_scale, pad_val=dict(img=(114, 114, 114))),
267
- dict(
268
- type='LoadAnnotations',
269
- with_bbox=True,
270
- # with_mask=True
271
- with_mask=False
272
- ),
273
- dict(
274
- type='PackDetInputs',
275
- meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
276
- 'scale_factor'))
277
- ]
278
-
279
- val_dataloader = dict(
280
- batch_size=val_batch_size_per_gpu,
281
- num_workers=num_workers,
282
- persistent_workers=True,
283
- drop_last=False,
284
- sampler=dict(type='DefaultSampler', shuffle=False),
285
- dataset=dict(
286
- type=dataset_type,
287
- data_root=data_root_val,
288
- ann_file=data_root_val + 'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
289
- data_prefix=dict(img='images/'),
290
- test_mode=True,
291
- pipeline=test_pipeline,
292
- metainfo=metainfo,
293
- )
294
- )
295
- # test_dataloader = val_dataloader
296
- test_dataloader = dict(
297
- batch_size=test_batch_size_per_gpu,
298
- num_workers=num_workers,
299
- persistent_workers=True,
300
- drop_last=False,
301
- sampler=dict(type='DefaultSampler', shuffle=False),
302
- dataset=dict(
303
- type=dataset_type,
304
- data_root=data_root_test,
305
- ann_file=data_root_test + 'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
306
- data_prefix=dict(img='images/'),
307
- test_mode=True,
308
- pipeline=test_pipeline,
309
- metainfo=metainfo,
310
- )
311
- )
312
-
313
- val_evaluator = dict(
314
- type='CocoMetric',
315
- ann_file=data_root_val + 'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
316
- metric='bbox',
317
- format_only=False)
318
- # test_evaluator = val_evaluator
319
- test_evaluator = dict(
320
- type='CocoMetric',
321
- ann_file=data_root_test + 'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
322
- metric='bbox',
323
- format_only=False
324
- )
325
-
326
- optim_wrapper = dict(
327
- type='AmpOptimWrapper',
328
- constructor='LayerDecayOptimizerConstructor',
329
- paramwise_cfg={
330
- 'decay_rate': 0.7,
331
- 'decay_type': 'layer_wise',
332
- 'num_layers': 12,
333
- },
334
- optimizer=dict(
335
- type='AdamW',
336
- # lr=0.0001,
337
- # lr=0.01,
338
- lr=0.001,
339
- betas=(0.9, 0.999),
340
- weight_decay=0.1,
341
- ))
342
-
343
- # 100 ep = 184375 iters * 64 images/iter / 118000 images/ep
344
- # max_iters = 184375
345
- # interval = 5000
346
- max_iters = 100000
347
-
348
- # interval = 2000
349
- interval = 1000
350
-
351
- dynamic_intervals = [(max_iters // interval * interval + 1, max_iters)]
352
- param_scheduler = [
353
- dict(
354
- type='LinearLR',
355
- start_factor=0.001,
356
- by_epoch=False,
357
- begin=0,
358
- end=250
359
- ),
360
- dict(
361
- type='MultiStepLR',
362
- begin=0,
363
-
364
- end=max_iters,
365
- # end=max_epochs,
366
-
367
- by_epoch=False,
368
- # by_epoch=True,
369
-
370
- # 88 ep = [163889 iters * 64 images/iter / 118000 images/ep
371
- # 96 ep = [177546 iters * 64 images/iter / 118000 images/ep
372
- # milestones=[20, 29],
373
- # milestones=[5000, 6000],
374
- milestones=[1000, 2000],
375
- gamma=0.1
376
- )
377
- ]
378
-
379
- train_cfg = dict(
380
- type='IterBasedTrainLoop',
381
- max_iters=max_iters,
382
- val_interval=interval,
383
- dynamic_intervals=dynamic_intervals
384
- )
385
- # train_cfg = dict(
386
- # type='EpochBasedTrainLoop',
387
- # max_epochs=max_epochs,
388
- # val_interval=1
389
- # )
390
-
391
- val_cfg = dict(type='ValLoop')
392
- test_cfg = dict(type='TestLoop')
393
-
394
- default_hooks = dict(
395
- logger=dict(
396
- type='LoggerHook',
397
- interval=50,
398
- log_metric_by_epoch=False
399
- ),
400
- checkpoint=dict(
401
- type='CheckpointHook',
402
- by_epoch=False,
403
- # by_epoch=True,
404
- save_last=True,
405
- # interval=1,
406
- interval=interval,
407
- save_best=['coco/bbox_mAP', 'coco/bbox_mAP_50'],
408
- max_keep_ckpts=2
409
- )
410
- )
411
-
412
- vis_backends = [
413
- dict(type='LocalVisBackend'),
414
- dict(type='TensorboardVisBackend')
415
- ]
416
-
417
- visualizer = dict(
418
- type='DetLocalVisualizer',
419
- vis_backends=vis_backends,
420
- name='visualizer'
421
- )
422
-
423
- log_processor = dict(
424
- type='LogProcessor',
425
- window_size=50,
426
- by_epoch=False
427
- # by_epoch=True
428
- )
429
-
430
- auto_scale_lr = dict(base_batch_size=64)
431
-
432
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
YOLOv5/configs/yolov5/yolov5.py DELETED
@@ -1,218 +0,0 @@
1
- _base_ = './yolov5_m-v61_syncbn_fast_8xb16-300e_coco.py'
2
- deepen_factor = 0.67
3
- widen_factor = 0.75
4
-
5
- # TRAIN DATASET
6
- data_root_train = 'YOUR_PATH_TO_REAL_LINZ_TRAIN'
7
-
8
- # VAL DATASET
9
- data_root_val = 'YOUR_PATH_TO_REAL_LINZ_VAL'
10
-
11
- # TEST DATASET
12
- ## LINZ
13
- data_root_test = 'YOUR_PATH_TO_REAL_LINZ_TEST'
14
-
15
-
16
- class_name = ('small',)
17
- num_classes = len(class_name)
18
- metainfo = dict(classes=class_name, palette=[(20, 220, 60)])
19
-
20
- img_scale = (128, 128)
21
- # img_scale = (112, 112)
22
-
23
- # Estimated with " python ./tools/analysis_tools/optimize_anchors.py --input-shape 128 128 --augment-args 0.1 1.9 --algorithm v5-k-means configs/..."
24
- # anchors = [[(25, 32), (53, 69), (159, 220)], [(235, 166), (242, 242), (310, 337)], [(365, 375), (230, 681), (679, 324)]]
25
- # anchors = [[(157, 155), (239, 133), (136, 238)], [(240, 165), (170, 237), (236, 191)], [(206, 240), (241, 217), (242, 242)]]
26
- anchors = [[(31, 28), (32, 37), (27, 48)], [(48, 27), (47, 34), (34, 48)], [(41, 48), (49, 41), (48, 48)]]
27
-
28
- max_epochs = 1000 # 40
29
- train_batch_size_per_gpu = 200
30
- validation_batch_size_per_gpu = 100
31
- test_batch_size_per_gpu = 200 #768 #384
32
- train_num_workers = 8
33
-
34
- num_det_layers = 3
35
-
36
- # Learning rate
37
- base_lr = 0.01 #0.01
38
- lr_factor = 0.1
39
-
40
- load_from = 'https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco/yolov5_m-v61_syncbn_fast_8xb16-300e_coco_20220917_204944-516a710f.pth'
41
-
42
- batch_shapes_cfg = dict(
43
- img_size=img_scale[0],
44
- batch_size=train_batch_size_per_gpu
45
- )
46
-
47
- pre_transform = _base_.pre_transform
48
- affine_scale = _base_.affine_scale
49
- mosaic_affine_pipeline = [
50
- dict(
51
- type='Mosaic',
52
- img_scale=img_scale,
53
- pad_val=114.0,
54
- pre_transform=pre_transform),
55
- dict(
56
- type='YOLOv5RandomAffine',
57
- max_rotate_degree=0.0,
58
- max_shear_degree=0.0,
59
- scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
60
- # img_scale is (width, height)
61
- border=(-img_scale[0] // 2, -img_scale[1] // 2),
62
- border_val=(114, 114, 114))
63
- ]
64
-
65
- train_pipeline = [
66
- *pre_transform,
67
- *mosaic_affine_pipeline,
68
- dict(
69
- type='YOLOv5MixUp',
70
- prob=_base_.mixup_prob,
71
- pre_transform=[*pre_transform, *mosaic_affine_pipeline]),
72
- dict(
73
- type='mmdet.Albu',
74
- transforms=_base_.albu_train_transforms,
75
- bbox_params=dict(
76
- type='BboxParams',
77
- format='pascal_voc',
78
- label_fields=['gt_bboxes_labels', 'gt_ignore_flags']),
79
- keymap={
80
- 'img': 'image',
81
- 'gt_bboxes': 'bboxes'
82
- }),
83
- dict(type='YOLOv5HSVRandomAug'),
84
- dict(type='mmdet.RandomFlip', prob=0.5),
85
- dict(
86
- type='mmdet.PackDetInputs',
87
- meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip',
88
- 'flip_direction'))
89
- ]
90
-
91
- _base_.test_pipeline[next(i for i, v in enumerate(_base_.test_pipeline) if v.type=='YOLOv5KeepRatioResize')].scale = img_scale
92
- _base_.test_pipeline[next(i for i, v in enumerate(_base_.test_pipeline) if v.type=='LetterResize')].scale = img_scale
93
-
94
-
95
- model = dict(
96
- backbone=dict(
97
- deepen_factor=deepen_factor,
98
- widen_factor=widen_factor,
99
- ),
100
- neck=dict(
101
- deepen_factor=deepen_factor,
102
- widen_factor=widen_factor,
103
- in_channels=[256, 512, 1024],
104
- out_channels=[256, 512, 1024],
105
- num_csp_blocks=3,
106
- ),
107
- bbox_head=dict(
108
- head_module=dict(
109
- widen_factor=widen_factor,
110
- num_classes=num_classes,
111
- featmap_strides=[8, 16, 32],
112
- in_channels=[256, 512, 1024],
113
- num_base_priors=3
114
- ),
115
- prior_generator=dict(
116
- base_sizes=anchors,
117
- strides=[
118
- 8,
119
- 16,
120
- 32,
121
- ],
122
- ),
123
- loss_obj=dict(
124
- loss_weight=_base_.loss_obj_weight * ((img_scale[0] / 640)**2 * 3 / num_det_layers)
125
- ),
126
- loss_cls=dict(
127
- loss_weight=_base_.loss_cls_weight * (num_classes / 80 * 3 / num_det_layers)
128
- ),
129
- loss_bbox=dict(
130
- loss_weight=_base_.loss_bbox_weight * (3 / num_det_layers),
131
- ),
132
- obj_level_weights=[
133
- 4.0,
134
- 1.0,
135
- 0.4,
136
- ],
137
- ),
138
- test_cfg=dict(
139
- nms=dict(type='nms', iou_threshold=0.65), # NMS type and threshold
140
- multi_label=False,
141
- ),
142
-
143
- )
144
-
145
-
146
- train_dataloader = dict(
147
- batch_size=train_batch_size_per_gpu,
148
- num_workers=train_num_workers,
149
- dataset=dict(
150
- _delete_=True,
151
- type='RepeatDataset',
152
- times=1,
153
- dataset=dict(
154
- type='YOLOv5CocoDataset',
155
- data_root=data_root_train,
156
- ann_file=data_root_train + 'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
157
- data_prefix=dict(img='images/'),
158
- metainfo=metainfo,
159
- filter_cfg=dict(filter_empty_gt=False),
160
- pipeline=train_pipeline
161
- )
162
- )
163
- )
164
-
165
- val_dataloader = dict(
166
- batch_size=validation_batch_size_per_gpu,
167
- num_workers=train_num_workers,
168
- dataset=dict(
169
- data_root=data_root_val,
170
- metainfo=metainfo,
171
- ann_file=data_root_val+'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
172
- data_prefix=dict(img='images/'),
173
- pipeline=_base_.test_pipeline
174
- )
175
- )
176
-
177
- test_dataloader = dict(
178
- batch_size=test_batch_size_per_gpu,
179
- num_workers=train_num_workers,
180
- dataset=dict(
181
- data_root=data_root_test,
182
- metainfo=metainfo,
183
- ann_file=data_root_test+'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
184
- data_prefix=dict(img='images/'),
185
- batch_shapes_cfg=batch_shapes_cfg,
186
- pipeline=_base_.test_pipeline
187
- )
188
- )
189
-
190
- _base_.optim_wrapper.optimizer.batch_size_per_gpu = train_batch_size_per_gpu
191
- _base_.optim_wrapper.optimizer.lr = base_lr
192
-
193
- val_evaluator = dict(
194
- ann_file=data_root_val+'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
195
- )
196
- test_evaluator = dict(
197
- ann_file=data_root_test+'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
198
- )
199
-
200
-
201
- default_hooks = dict(
202
- checkpoint=dict(
203
- interval=1,
204
- max_keep_ckpts=1,
205
- save_best=['coco/bbox_mAP', 'coco/bbox_mAP_50']
206
- ),
207
- # The warmup_mim_iter parameter is critical.
208
- # The default value is 1000 which is not suitable for cat datasets.
209
- param_scheduler=dict(
210
- max_epochs=max_epochs,
211
- warmup_mim_iter=1000,
212
- lr_factor=lr_factor
213
- ),
214
- logger=dict(type='LoggerHook', interval=5))
215
-
216
- train_cfg = dict(max_epochs=max_epochs, val_interval=1)
217
- visualizer = dict(vis_backends=[dict(type='LocalVisBackend'), dict(type='TensorboardVisBackend')])
218
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
YOLOv8/configs/yolov8/yolov8.py DELETED
@@ -1,240 +0,0 @@
1
- _base_ = './yolov8_s_syncbn_fast_8xb16-500e_coco.py'
2
-
3
- # ========================modified parameters======================
4
- deepen_factor = 0.67
5
- widen_factor = 0.75
6
- last_stage_out_channels = 768
7
-
8
- affine_scale = 0.9
9
- mixup_prob = 0.1
10
-
11
-
12
- img_scale = (128, 128) #_base_.img_scale
13
- # img_scale = (640, 640) #_base_.img_scale
14
- num_classes = 1
15
- class_name = ('small',)
16
- num_classes = len(class_name)
17
- metainfo = dict(classes=class_name, palette=[(20, 220, 60)])
18
-
19
- train_batch_size_per_gpu = 512
20
- val_batch_size_per_gpu = 128
21
- test_batch_size_per_gpu = 128
22
-
23
- train_num_workers = 16
24
- val_num_workers = 16
25
- test_num_workers = 16
26
-
27
- # -----train val related-----
28
- # Base learning rate for optim_wrapper. Corresponding to 8xb16=64 bs
29
- base_lr = 0.001
30
- lr_factor = 0.01 # Learning rate scaling factor
31
- max_epochs = 1000 # Maximum training epochs
32
-
33
- # Disable mosaic augmentation for final 10 epochs (stage 2)
34
- close_mosaic_epochs = 10
35
-
36
- save_epoch_intervals = 1
37
- max_keep_ckpts = 2
38
-
39
- # validation intervals in stage 2
40
- val_interval_stage2 = 1
41
-
42
- # TRAIN DATASET
43
- data_root_train = 'YOUR_PATH_TO_REAL_LINZ_TRAIN'
44
- ann_file_train = 'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json'
45
-
46
- # VAL DATASET
47
- data_root_val = 'YOUR_PATH_TO_REAL_LINZ_VAL'
48
- ann_file_val = 'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json'
49
-
50
- # TEST DATASET
51
- ## LINZ
52
- data_root_test = 'YOUR_PATH_TO_REAL_LINZ_TEST'
53
- ann_file_test = 'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json'
54
-
55
-
56
- load_from = 'https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco/yolov8_m_syncbn_fast_8xb16-500e_coco_20230115_192200-c22e560a.pth'
57
-
58
-
59
- # =======================Unmodified in most cases==================
60
- pre_transform = _base_.pre_transform
61
- last_transform = _base_.last_transform
62
-
63
- model = dict(
64
- backbone=dict(
65
- last_stage_out_channels=last_stage_out_channels,
66
- deepen_factor=deepen_factor,
67
- widen_factor=widen_factor
68
- ),
69
- neck=dict(
70
- deepen_factor=deepen_factor,
71
- widen_factor=widen_factor,
72
- in_channels=[256, 512, last_stage_out_channels],
73
- out_channels=[256, 512, last_stage_out_channels]
74
- ),
75
- bbox_head=dict(
76
- head_module=dict(
77
- num_classes=num_classes,
78
- widen_factor=widen_factor,
79
- in_channels=[256, 512, last_stage_out_channels])
80
- ),
81
- train_cfg=dict(
82
- assigner=dict(
83
- num_classes=num_classes
84
- )
85
- )
86
- )
87
-
88
- mosaic_affine_transform = [
89
- dict(
90
- type='Mosaic',
91
- img_scale=img_scale,
92
- pad_val=114.0,
93
- pre_transform=pre_transform),
94
- dict(
95
- type='YOLOv5RandomAffine',
96
- max_rotate_degree=0.0,
97
- max_shear_degree=0.0,
98
- max_aspect_ratio=100,
99
- scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
100
- # img_scale is (width, height)
101
- border=(-img_scale[0] // 2, -img_scale[1] // 2),
102
- border_val=(114, 114, 114))
103
- ]
104
-
105
- # enable mixup
106
- train_pipeline = [
107
- *pre_transform, *mosaic_affine_transform,
108
- dict(
109
- type='YOLOv5MixUp',
110
- prob=mixup_prob,
111
- pre_transform=[*pre_transform, *mosaic_affine_transform]),
112
- *last_transform
113
- ]
114
-
115
- train_pipeline_stage2 = [
116
- *pre_transform,
117
- dict(type='YOLOv5KeepRatioResize', scale=img_scale),
118
- dict(
119
- type='LetterResize',
120
- scale=img_scale,
121
- allow_scale_up=True,
122
- pad_val=dict(img=114.0)
123
- ),
124
- dict(
125
- type='YOLOv5RandomAffine',
126
- max_rotate_degree=0.0,
127
- max_shear_degree=0.0,
128
- scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
129
- max_aspect_ratio=100,
130
- border_val=(114, 114, 114)
131
- ),
132
- *last_transform
133
- ]
134
-
135
- train_dataloader = dict(
136
- batch_size=train_batch_size_per_gpu,
137
- num_workers=train_num_workers,
138
- dataset=dict(
139
- data_root=data_root_train,
140
- ann_file=data_root_train+ann_file_train,
141
- data_prefix=dict(img='images/'),
142
- filter_cfg=dict(filter_empty_gt=False),
143
- metainfo=metainfo,
144
- pipeline=train_pipeline
145
- )
146
- )
147
-
148
- # _base_.test_pipeline[1].img_scale = img_scale
149
- # _base_.test_pipeline[2].scale = img_scale
150
-
151
- test_pipeline = [
152
- dict(type='LoadImageFromFile', backend_args=_base_.backend_args),
153
- dict(type='YOLOv5KeepRatioResize', scale=img_scale),
154
- dict(
155
- type='LetterResize',
156
- scale=img_scale,
157
- allow_scale_up=False,
158
- pad_val=dict(img=114)),
159
- dict(type='LoadAnnotations', with_bbox=True, _scope_='mmdet'),
160
- dict(
161
- type='mmdet.PackDetInputs',
162
- meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
163
- 'scale_factor', 'pad_param'))
164
- ]
165
-
166
- val_dataloader = dict(
167
- batch_size=val_batch_size_per_gpu,
168
- num_workers=val_num_workers,
169
- dataset=dict(
170
- data_root=data_root_val,
171
- ann_file=data_root_val+ann_file_val,
172
- data_prefix=dict(img='images/'),
173
- metainfo=metainfo,
174
- # filter_cfg=dict(filter_empty_gt=False), # Does this make a change?
175
- filter_cfg=dict(filter_empty_gt=True), # Does this make a change?
176
- pipeline=test_pipeline,
177
- )
178
- )
179
-
180
- test_dataloader = dict(
181
- batch_size=test_batch_size_per_gpu,
182
- num_workers=test_num_workers,
183
- dataset=dict(
184
- data_root=data_root_test,
185
- ann_file=data_root_test+ann_file_test,
186
- data_prefix=dict(img='images/'),
187
- metainfo=metainfo,
188
- filter_cfg=dict(filter_empty_gt=False), # Does this make a change?
189
- pipeline=test_pipeline,
190
- )
191
- )
192
-
193
-
194
- optim_wrapper = dict(
195
- optimizer=dict(
196
- lr=base_lr,
197
- batch_size_per_gpu=train_batch_size_per_gpu
198
- ),
199
- )
200
-
201
-
202
- default_hooks = dict(
203
- param_scheduler=dict(
204
- lr_factor=lr_factor,
205
- max_epochs=max_epochs
206
- ),
207
- checkpoint=dict(
208
- interval=save_epoch_intervals,
209
- max_keep_ckpts=max_keep_ckpts,
210
- save_best=['coco/bbox_mAP', 'coco/bbox_mAP_50']
211
- )
212
- )
213
-
214
- _base_.custom_hooks[1].switch_epoch = max_epochs - close_mosaic_epochs
215
- _base_.custom_hooks[1].switch_pipeline = train_pipeline_stage2
216
-
217
- val_evaluator = dict(
218
- ann_file=data_root_val + ann_file_val,
219
- )
220
-
221
- test_evaluator = dict(
222
- ann_file= data_root_test + ann_file_test,
223
- )
224
-
225
- train_cfg = dict(
226
- max_epochs=max_epochs,
227
- val_interval=save_epoch_intervals,
228
- dynamic_intervals=[
229
- ((max_epochs - close_mosaic_epochs),
230
- val_interval_stage2)
231
- ]
232
- )
233
-
234
-
235
- visualizer = dict(
236
- vis_backends=[
237
- dict(type='LocalVisBackend'),
238
- dict(type='TensorboardVisBackend')
239
- ]
240
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
FasterRCNN/best_coco_bbox_mAP_50_epoch_7.pth β†’ faster-rcnn.pth RENAMED
File without changes
ViTDet/best_coco_bbox_mAP_50_iter_16000.pth β†’ vitdet.pth RENAMED
File without changes
YOLOv5/best_coco_bbox_mAP_50_epoch_429.pth β†’ yolov5.pth RENAMED
File without changes
YOLOv8/best_coco_bbox_mAP_50_epoch_32.pth β†’ yolov8.pth RENAMED
File without changes