xiaofanghf commited on
Commit
b589cda
·
verified ·
1 Parent(s): c29d3a8

Upload folder using huggingface_hub

Browse files
FasterRCNN/best_coco_bbox_mAP_50_epoch_32.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e1d095ded2b29ebebb691754a11df8c4ab1492124c6c79aa11f960bfd8bd5d0
3
- size 166339657
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b6d76dbe4b16b55d8e9efc83286b442c4ffaa8500aebdabe8761a6af60529f6
3
+ size 134
FasterRCNN/configs/faster_rcnn/faster-rcnn.py ADDED
@@ -0,0 +1,307 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = [
2
+ '../_base_/models/faster-rcnn_r50_fpn.py',
3
+ '../_base_/datasets/coco_detection.py',
4
+ '../_base_/schedules/schedule_2x.py', '../_base_/default_runtime.py'
5
+ ]
6
+
7
+
8
+
9
+ # TRAIN DATASET
10
+ data_root_train = 'YOUR_PATH_TO_REAL_UGRC_TRAIN'
11
+
12
+ # VAL DATASET
13
+ data_root_val = 'YOUR_PATH_TO_REAL_UGRC_VAL'
14
+
15
+ # TEST DATASET
16
+ ## UGRC
17
+ data_root_test = 'YOUR_PATH_TO_REAL_UGRC_TEST'
18
+
19
+
20
+ max_epochs = 2000 # 40
21
+ train_batch_size_per_gpu = 128
22
+ validation_batch_size_per_gpu = 64
23
+ test_batch_size_per_gpu = 64
24
+ num_workers = 8
25
+
26
+
27
+ class_name = ('small',)
28
+ num_classes = len(class_name)
29
+ metainfo = dict(classes=class_name, palette=[(20, 220, 60)])
30
+
31
+ img_scale = (128, 128)
32
+
33
+ load_from = 'https://download.openxlab.org.cn/models/mmdetection/FasterR-CNN/weight/faster-rcnn_r50_fpn_2x_coco'
34
+
35
+
36
+ # model settings
37
+ model = dict(
38
+ type='FasterRCNN',
39
+ data_preprocessor=dict(
40
+ type='DetDataPreprocessor',
41
+ mean=[123.675, 116.28, 103.53],
42
+ std=[58.395, 57.12, 57.375],
43
+ bgr_to_rgb=True,
44
+ pad_size_divisor=32),
45
+ backbone=dict(
46
+ type='ResNet',
47
+ depth=50,
48
+ num_stages=4,
49
+ out_indices=(0, 1, 2, 3),
50
+ frozen_stages=1,
51
+ norm_cfg=dict(type='BN', requires_grad=True),
52
+ norm_eval=True,
53
+ style='pytorch',
54
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
55
+ neck=dict(
56
+ type='FPN',
57
+ in_channels=[256, 512, 1024, 2048],
58
+ out_channels=256,
59
+ num_outs=5),
60
+ rpn_head=dict(
61
+ type='RPNHead',
62
+ in_channels=256,
63
+ feat_channels=256,
64
+ anchor_generator=dict(
65
+ type='AnchorGenerator',
66
+ scales=[8],
67
+ ratios=[0.5, 1.0, 2.0],
68
+ strides=[4, 8, 16, 32, 64]),
69
+ bbox_coder=dict(
70
+ type='DeltaXYWHBBoxCoder',
71
+ target_means=[.0, .0, .0, .0],
72
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
73
+ loss_cls=dict(
74
+ type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
75
+ loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
76
+ roi_head=dict(
77
+ type='StandardRoIHead',
78
+ bbox_roi_extractor=dict(
79
+ type='SingleRoIExtractor',
80
+ roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
81
+ out_channels=256,
82
+ featmap_strides=[4, 8, 16, 32]),
83
+ bbox_head=dict(
84
+ type='Shared2FCBBoxHead',
85
+ in_channels=256,
86
+ fc_out_channels=1024,
87
+ roi_feat_size=7,
88
+ num_classes=num_classes,
89
+ bbox_coder=dict(
90
+ type='DeltaXYWHBBoxCoder',
91
+ target_means=[0., 0., 0., 0.],
92
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
93
+ reg_class_agnostic=False,
94
+ loss_cls=dict(
95
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
96
+ loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
97
+ # model training and testing settings
98
+ train_cfg=dict(
99
+ rpn=dict(
100
+ assigner=dict(
101
+ type='MaxIoUAssigner',
102
+ pos_iou_thr=0.7,
103
+ neg_iou_thr=0.3,
104
+ min_pos_iou=0.3,
105
+ match_low_quality=True,
106
+ ignore_iof_thr=-1),
107
+ sampler=dict(
108
+ type='RandomSampler',
109
+ num=256,
110
+ pos_fraction=0.5,
111
+ neg_pos_ub=-1,
112
+ add_gt_as_proposals=False),
113
+ allowed_border=-1,
114
+ pos_weight=-1,
115
+ debug=False),
116
+ rpn_proposal=dict(
117
+ nms_pre=2000,
118
+ max_per_img=1000,
119
+ nms=dict(type='nms', iou_threshold=0.7),
120
+ min_bbox_size=0),
121
+ rcnn=dict(
122
+ assigner=dict(
123
+ type='MaxIoUAssigner',
124
+ pos_iou_thr=0.5,
125
+ neg_iou_thr=0.5,
126
+ min_pos_iou=0.5,
127
+ match_low_quality=False,
128
+ ignore_iof_thr=-1),
129
+ sampler=dict(
130
+ type='RandomSampler',
131
+ num=512,
132
+ pos_fraction=0.25,
133
+ neg_pos_ub=-1,
134
+ add_gt_as_proposals=True),
135
+ pos_weight=-1,
136
+ debug=False)),
137
+ test_cfg=dict(
138
+ rpn=dict(
139
+ nms_pre=1000,
140
+ max_per_img=1000,
141
+ nms=dict(type='nms', iou_threshold=0.7),
142
+ min_bbox_size=0),
143
+ rcnn=dict(
144
+ score_thr=0.05,
145
+ nms=dict(type='nms', iou_threshold=0.5),
146
+ max_per_img=100)
147
+ # soft-nms is also supported for rcnn testing
148
+ # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05)
149
+ ))
150
+
151
+
152
+
153
+ dataset_type = 'CocoDataset'
154
+
155
+ backend_args = None
156
+
157
+ train_pipeline = [
158
+ dict(type='LoadImageFromFile', backend_args=backend_args),
159
+ dict(type='LoadAnnotations', with_bbox=True),
160
+ dict(type='Resize', scale=img_scale, keep_ratio=True),
161
+ dict(type='RandomFlip', prob=0.5),
162
+ dict(type='PackDetInputs')
163
+ ]
164
+
165
+ train_dataloader = dict(
166
+ batch_size=train_batch_size_per_gpu,
167
+ num_workers=num_workers,
168
+ persistent_workers=True,
169
+ sampler=dict(type='DefaultSampler', shuffle=True),
170
+ batch_sampler=dict(type='AspectRatioBatchSampler'),
171
+ dataset=dict(
172
+ type=dataset_type,
173
+ data_root=data_root_train,
174
+ ann_file='annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
175
+ data_prefix=dict(img='images/'),
176
+ filter_cfg=dict(filter_empty_gt=False, min_size=32),
177
+ pipeline=train_pipeline,
178
+ metainfo=metainfo,
179
+ backend_args=backend_args
180
+ )
181
+ )
182
+
183
+
184
+ test_pipeline = [
185
+ dict(type='LoadImageFromFile', backend_args=backend_args),
186
+ dict(type='Resize', scale=img_scale, keep_ratio=True),
187
+ # If you don't have a gt annotation, delete the pipeline
188
+ dict(type='LoadAnnotations', with_bbox=True),
189
+ dict(
190
+ type='PackDetInputs',
191
+ meta_keys=(
192
+ 'img_id', 'img_path', 'ori_shape', 'img_shape',
193
+ 'scale_factor'
194
+ )
195
+ )
196
+ ]
197
+
198
+
199
+ val_dataloader = dict(
200
+ batch_size=validation_batch_size_per_gpu,
201
+ num_workers=num_workers,
202
+ persistent_workers=True,
203
+ drop_last=False,
204
+ sampler=dict(type='DefaultSampler', shuffle=False),
205
+ dataset=dict(
206
+ type=dataset_type,
207
+ data_root=data_root_val,
208
+ ann_file='annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
209
+ data_prefix=dict(img='images/'),
210
+ filter_cfg=dict(filter_empty_gt=False, min_size=32),
211
+ test_mode=True,
212
+ pipeline=test_pipeline,
213
+ metainfo=metainfo,
214
+ backend_args=backend_args
215
+ )
216
+ )
217
+
218
+ test_dataloader = dict(
219
+ batch_size=test_batch_size_per_gpu,
220
+ num_workers=num_workers,
221
+ persistent_workers=True,
222
+ drop_last=False,
223
+ sampler=dict(type='DefaultSampler', shuffle=False),
224
+ dataset=dict(
225
+ type=dataset_type,
226
+ data_root=data_root_test,
227
+ ann_file='annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
228
+ data_prefix=dict(img='images/'),
229
+ filter_cfg=dict(filter_empty_gt=False, min_size=32),
230
+ test_mode=True,
231
+ pipeline=test_pipeline,
232
+ metainfo=metainfo,
233
+ backend_args=backend_args
234
+ )
235
+ )
236
+
237
+ # test_dataloader = val_dataloader
238
+
239
+ val_evaluator = dict(
240
+ type='CocoMetric',
241
+ ann_file=data_root_val + 'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
242
+ metric='bbox',
243
+ format_only=False,
244
+ backend_args=backend_args
245
+ )
246
+
247
+ test_evaluator = dict(
248
+ type='CocoMetric',
249
+ ann_file=data_root_test + 'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
250
+ metric='bbox',
251
+ format_only=False,
252
+ backend_args=backend_args
253
+ )
254
+
255
+ # test_evaluator = val_evaluator
256
+
257
+
258
+
259
+ # training schedule for 2x
260
+ train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=max_epochs, val_interval=1)
261
+ val_cfg = dict(type='ValLoop')
262
+ test_cfg = dict(type='TestLoop')
263
+
264
+ # learning rate
265
+ param_scheduler = [
266
+ dict(
267
+ type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500),
268
+ dict(
269
+ type='MultiStepLR',
270
+ begin=0,
271
+ end=max_epochs,
272
+ by_epoch=True,
273
+ milestones=[16, 22],
274
+ gamma=0.1)
275
+ ]
276
+
277
+ # optimizer
278
+ optim_wrapper = dict(
279
+ type='OptimWrapper',
280
+ optimizer=dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001))
281
+
282
+ # Default setting for scaling LR automatically
283
+ # - `enable` means enable scaling LR automatically
284
+ # or not by default.
285
+ # - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
286
+ auto_scale_lr = dict(enable=False, base_batch_size=train_batch_size_per_gpu)
287
+
288
+
289
+ default_hooks = dict(
290
+ checkpoint=dict(
291
+ interval=1,
292
+ max_keep_ckpts=1,
293
+ save_best=['coco/bbox_mAP', 'coco/bbox_mAP_50']
294
+ ),
295
+ # The warmup_mim_iter parameter is critical.
296
+ # The default value is 1000 which is not suitable for cat datasets.
297
+ # param_scheduler=dict(
298
+ # max_epochs=max_epochs,
299
+ # warmup_mim_iter=1000,
300
+ # lr_factor=lr_factor
301
+ # ),
302
+ logger=dict(type='LoggerHook', interval=5))
303
+
304
+ vis_backends = [dict(type='LocalVisBackend'), dict(type='TensorboardVisBackend')]
305
+ visualizer = dict(
306
+ type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
307
+
README.md CHANGED
@@ -1,3 +1,18 @@
1
  ---
2
  license: cc-by-nc-4.0
 
 
 
 
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: cc-by-nc-4.0
3
+ language:
4
+ - en
5
+ pipeline_tag: object-detection
6
+ library_name: mmdetection
7
  ---
8
+ ## Introduction
9
+ We introduce a real-world aerial view dataset, UGRC, captured in Utah (USA). The dataset has ground sampling distance (GSD) of 12.5 cm per px and have been sampled to 112 px × 112 px image size. For data annotation, we label only the small vehicle centers. To leverage the abundance of bounding box-based open-source object detection frameworks, we define a fixed-size ground truth bounding box of 42.36 px × 42.36 px center at each vehicle. Annotations are provided in COCO format [x, y, w, h], where "small" in the annotation json files denotes the small vehicle class and (x, y) denotes the top-left corner of the bounding box. We use AP50 as evaluation metrics.
10
+
11
+ ## Model Usage
12
+ This folder contains four detectors trained on Real UGRC data and tested on Real UGRC data, along with configuration files we use for training and testing.
13
+
14
+ ## References
15
+
16
+ ➡️ **Paper:** [Adapting Vehicle Detectors for Aerial Imagery to Unseen Domains with Weak Supervision](https://arxiv.org/abs/2507.20976)
17
+ ➡️ **Project Page:** [Webpage](https://humansensinglab.github.io/AGenDA/)
18
+ ➡️ **Data:** [AGenDA](https://github.com/humansensinglab/AGenDA/tree/main/Data)
ViTDet/best_coco_bbox_mAP_50_iter_32000.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2195ad45ef9bfe79358144a8df84df9a3d9da4cfb7f84fb94444a9d9195783e4
3
- size 437670711
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6be3ec54217d757f68b3705899578ed5d64c1cdf165afff2c5291384822201a3
3
+ size 134
ViTDet/projects/ViTDet/configs/vitdet.py ADDED
@@ -0,0 +1,333 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = [
2
+ '../../../configs/_base_/default_runtime.py',
3
+ '../../../configs/_base_/models/mask-rcnn_r50_fpn.py',
4
+ ]
5
+
6
+ custom_imports = dict(imports=['projects.ViTDet.vitdet'])
7
+
8
+
9
+ ## TRAIN DATASET
10
+ data_root_train = 'YOUR_PATH_TO_REAL_UGRC_TRAIN'
11
+
12
+
13
+ ## VALIDATION DATASET
14
+ data_root_val = 'YOUR_PATH_TO_REAL_UGRC_VAL'
15
+
16
+ # TEST DATASET
17
+ ## UGRC
18
+ data_root_test = 'YOUR_PATH_TO_REAL_UGRC_TEST'
19
+
20
+
21
+ train_batch_size_per_gpu = 24
22
+ val_batch_size_per_gpu = 12
23
+ test_batch_size_per_gpu = 60
24
+
25
+ num_workers = 8
26
+
27
+ max_epochs = 100
28
+
29
+
30
+ # image_size = (1024, 1024)
31
+ # image_size = (384, 384)
32
+ image_size = (128, 128)
33
+
34
+
35
+ class_name = ('small',)
36
+ num_classes = len(class_name)
37
+ metainfo = dict(classes=class_name, palette=[(20, 220, 60)])
38
+
39
+
40
+
41
+ load_from = 'https://download.openmmlab.com/mmdetection/v3.0/vitdet/vitdet_mask-rcnn_vit-b-mae_lsj-100e/vitdet_mask-rcnn_vit-b-mae_lsj-100e_20230328_153519-e15fe294.pth'
42
+
43
+
44
+ # MODEL SETTINGS
45
+ backbone_norm_cfg = dict(type='LN', requires_grad=True)
46
+ norm_cfg = dict(type='LN2d', requires_grad=True)
47
+
48
+ batch_augments = [
49
+ dict(type='BatchFixedSizePad', size=image_size, pad_mask=True)
50
+ ]
51
+
52
+ model = dict(
53
+ data_preprocessor=dict(pad_size_divisor=32, batch_augments=batch_augments),
54
+ backbone=dict(
55
+ _delete_=True,
56
+ type='ViT',
57
+ # img_size=1024,
58
+ # img_size=384,
59
+ img_size=image_size[0],
60
+ patch_size=16,
61
+ embed_dim=768,
62
+ depth=12,
63
+ num_heads=12,
64
+ drop_path_rate=0.1,
65
+ window_size=14,
66
+ mlp_ratio=4,
67
+ qkv_bias=True,
68
+ norm_cfg=backbone_norm_cfg,
69
+ window_block_indexes=[
70
+ 0,
71
+ 1,
72
+ 3,
73
+ 4,
74
+ 6,
75
+ 7,
76
+ 9,
77
+ 10,
78
+ ],
79
+ use_rel_pos=True,
80
+ init_cfg=dict(
81
+ type='Pretrained',
82
+ # checkpoint='mae_pretrain_vit_base.pth'
83
+ # checkpoint='detectron2://ImageNetPretrained/MAE/mae_pretrain_vit_base.pth'
84
+ checkpoint='vitdet_mask-rcnn_vit-b-mae_lsj-100e_20230328_153519-e15fe294.pth'
85
+ )
86
+ ),
87
+ neck=dict(
88
+ _delete_=True,
89
+ type='SimpleFPN',
90
+ backbone_channel=768,
91
+ in_channels=[192, 384, 768, 768],
92
+ out_channels=256,
93
+ num_outs=5,
94
+ norm_cfg=norm_cfg),
95
+ rpn_head=dict(num_convs=2),
96
+ roi_head=dict(
97
+ bbox_head=dict(
98
+ type='Shared4Conv1FCBBoxHead',
99
+ conv_out_channels=256,
100
+ norm_cfg=norm_cfg,
101
+ num_classes=num_classes
102
+ ),
103
+ # mask_head=dict( # No masks as used
104
+ # norm_cfg=norm_cfg,
105
+ # num_classes=1,
106
+ # loss_mask=dict(
107
+ # use_mask=False
108
+ # ),
109
+ # )
110
+ mask_head=None
111
+ )
112
+ )
113
+
114
+ custom_hooks = [dict(type='Fp16CompresssionHook')]
115
+
116
+
117
+
118
+
119
+
120
+
121
+
122
+
123
+ ##
124
+ dataset_type = 'CocoDataset'
125
+ backend_args = None
126
+
127
+ train_pipeline = [
128
+ dict(type='LoadImageFromFile', backend_args=backend_args),
129
+ dict(
130
+ type='LoadAnnotations',
131
+ with_bbox=True,
132
+ # with_mask=True
133
+ with_mask=False
134
+ ),
135
+ dict(type='RandomFlip', prob=0.5),
136
+ dict(
137
+ type='RandomResize',
138
+ scale=image_size,
139
+ ratio_range=(0.1, 2.0),
140
+ keep_ratio=True),
141
+ dict(
142
+ type='RandomCrop',
143
+ crop_type='absolute_range',
144
+ crop_size=image_size,
145
+ recompute_bbox=True,
146
+ allow_negative_crop=True),
147
+ dict(type='FilterAnnotations', min_gt_bbox_wh=(1e-2, 1e-2)),
148
+ dict(type='Pad', size=image_size, pad_val=dict(img=(114, 114, 114))),
149
+ dict(type='PackDetInputs')
150
+ ]
151
+
152
+ test_pipeline = [
153
+ dict(type='LoadImageFromFile', backend_args=backend_args),
154
+ dict(type='Resize', scale=image_size, keep_ratio=True),
155
+ dict(type='Pad', size=image_size, pad_val=dict(img=(114, 114, 114))),
156
+ dict(
157
+ type='LoadAnnotations',
158
+ with_bbox=True,
159
+ # with_mask=True
160
+ with_mask=False
161
+ ),
162
+ dict(
163
+ type='PackDetInputs',
164
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
165
+ 'scale_factor'))
166
+ ]
167
+
168
+ train_dataloader = dict(
169
+ batch_size=train_batch_size_per_gpu,
170
+ num_workers=num_workers,
171
+ persistent_workers=True,
172
+ sampler=dict(type='DefaultSampler', shuffle=True),
173
+ dataset=dict(
174
+ type=dataset_type,
175
+ data_root=data_root_train,
176
+ ann_file=data_root_train + 'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
177
+ data_prefix=dict(img='images/'),
178
+ # filter_cfg=dict(filter_empty_gt=True, min_size=32),
179
+ filter_cfg=dict(filter_empty_gt=False),
180
+ pipeline=train_pipeline,
181
+ metainfo=metainfo,
182
+ )
183
+ )
184
+
185
+ val_dataloader = dict(
186
+ batch_size=val_batch_size_per_gpu,
187
+ num_workers=num_workers,
188
+ persistent_workers=True,
189
+ drop_last=False,
190
+ sampler=dict(type='DefaultSampler', shuffle=False),
191
+ dataset=dict(
192
+ type=dataset_type,
193
+ data_root=data_root_val,
194
+ ann_file=data_root_val + 'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
195
+ data_prefix=dict(img='images/'),
196
+ test_mode=True,
197
+ pipeline=test_pipeline,
198
+ metainfo=metainfo,
199
+ )
200
+ )
201
+ # test_dataloader = val_dataloader
202
+ test_dataloader = dict(
203
+ batch_size=test_batch_size_per_gpu,
204
+ num_workers=num_workers,
205
+ persistent_workers=True,
206
+ drop_last=False,
207
+ sampler=dict(type='DefaultSampler', shuffle=False),
208
+ dataset=dict(
209
+ type=dataset_type,
210
+ data_root=data_root_test,
211
+ ann_file=data_root_test + 'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
212
+ data_prefix=dict(img='images/'),
213
+ test_mode=True,
214
+ pipeline=test_pipeline,
215
+ metainfo=metainfo,
216
+ )
217
+ )
218
+
219
+ val_evaluator = dict(
220
+ type='CocoMetric',
221
+ ann_file=data_root_val + 'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
222
+ metric='bbox',
223
+ format_only=False)
224
+ # test_evaluator = val_evaluator
225
+ test_evaluator = dict(
226
+ type='CocoMetric',
227
+ ann_file=data_root_test + 'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
228
+ metric='bbox',
229
+ format_only=False
230
+ )
231
+
232
+ optim_wrapper = dict(
233
+ type='AmpOptimWrapper',
234
+ constructor='LayerDecayOptimizerConstructor',
235
+ paramwise_cfg={
236
+ 'decay_rate': 0.7,
237
+ 'decay_type': 'layer_wise',
238
+ 'num_layers': 12,
239
+ },
240
+ optimizer=dict(
241
+ type='AdamW',
242
+ lr=0.0001,
243
+ betas=(0.9, 0.999),
244
+ weight_decay=0.1,
245
+ ))
246
+
247
+ # 100 ep = 184375 iters * 64 images/iter / 118000 images/ep
248
+ # max_iters = 184375
249
+ # interval = 5000
250
+ max_iters = 100000
251
+
252
+ # interval = 2000
253
+ interval = 1000
254
+
255
+ dynamic_intervals = [(max_iters // interval * interval + 1, max_iters)]
256
+ param_scheduler = [
257
+ dict(
258
+ type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=250),
259
+ dict(
260
+ type='MultiStepLR',
261
+ begin=0,
262
+
263
+ end=max_iters,
264
+ # end=max_epochs,
265
+
266
+ by_epoch=False,
267
+ # by_epoch=True,
268
+
269
+ # 88 ep = [163889 iters * 64 images/iter / 118000 images/ep
270
+ # 96 ep = [177546 iters * 64 images/iter / 118000 images/ep
271
+ milestones=[20, 29],
272
+ gamma=0.1)
273
+ ]
274
+
275
+ train_cfg = dict(
276
+ type='IterBasedTrainLoop',
277
+ max_iters=max_iters,
278
+ val_interval=interval,
279
+ dynamic_intervals=dynamic_intervals
280
+ )
281
+ # train_cfg = dict(
282
+ # type='EpochBasedTrainLoop',
283
+ # max_epochs=max_epochs,
284
+ # val_interval=1
285
+ # )
286
+
287
+ val_cfg = dict(type='ValLoop')
288
+ test_cfg = dict(type='TestLoop')
289
+
290
+ default_hooks = dict(
291
+ logger=dict(
292
+ type='LoggerHook',
293
+ interval=50
294
+ ),
295
+ checkpoint=dict(
296
+ type='CheckpointHook',
297
+ by_epoch=False,
298
+ # by_epoch=True,
299
+ save_last=True,
300
+ # interval=1,
301
+ interval=interval,
302
+ save_best=['coco/bbox_mAP', 'coco/bbox_mAP_50'],
303
+ max_keep_ckpts=5
304
+ ),
305
+ # visualization=dict(
306
+ # type='DetVisualizationHook',
307
+ # draw=False,
308
+ # interval=1,
309
+ # show=False
310
+ # )
311
+ )
312
+
313
+ vis_backends = [
314
+ dict(type='LocalVisBackend'),
315
+ dict(type='TensorboardVisBackend')
316
+ ]
317
+
318
+ visualizer = dict(
319
+ type='DetLocalVisualizer',
320
+ vis_backends=vis_backends,
321
+ name='visualizer'
322
+ )
323
+
324
+ log_processor = dict(
325
+ type='LogProcessor',
326
+ window_size=50,
327
+ by_epoch=False
328
+ # by_epoch=True
329
+ )
330
+
331
+ auto_scale_lr = dict(base_batch_size=64)
332
+
333
+
YOLOv5/best_coco_bbox_mAP_epoch_995.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a56589baac4376706e119aa06c5c5cebedf1157f976a3298d7ef62cbbaef2d9
3
- size 178163455
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39d6089e1be61cc3b869b1106543a0ef2012385a6e2ddf8a1978896bac89b3ae
3
+ size 134
YOLOv5/configs/yolov5/yolov5.py ADDED
@@ -0,0 +1,218 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = './yolov5_m-v61_syncbn_fast_8xb16-300e_coco.py'
2
+ deepen_factor = 0.67
3
+ widen_factor = 0.75
4
+
5
+ # TRAIN DATASET
6
+ data_root_train = 'YOUR_PATH_TO_REAL_UGRC_TRAIN'
7
+
8
+ # VAL DATASET
9
+ data_root_val = 'YOUR_PATH_TO_REAL_UGRC_VAL'
10
+
11
+ # TEST DATASET
12
+ ## UGRC
13
+ data_root_test = 'YOUR_PATH_TO_REAL_UGRC_TEST'
14
+
15
+
16
+ class_name = ('small',)
17
+ num_classes = len(class_name)
18
+ metainfo = dict(classes=class_name, palette=[(20, 220, 60)])
19
+
20
+ img_scale = (128, 128)
21
+ # img_scale = (112, 112)
22
+
23
+ # Estimated with " python ./tools/analysis_tools/optimize_anchors.py --input-shape 128 128 --augment-args 0.1 1.9 --algorithm v5-k-means configs/..."
24
+ # anchors = [[(25, 32), (53, 69), (159, 220)], [(235, 166), (242, 242), (310, 337)], [(365, 375), (230, 681), (679, 324)]]
25
+ # anchors = [[(157, 155), (239, 133), (136, 238)], [(240, 165), (170, 237), (236, 191)], [(206, 240), (241, 217), (242, 242)]]
26
+ anchors = [[(31, 28), (32, 37), (27, 48)], [(48, 27), (47, 34), (34, 48)], [(41, 48), (49, 41), (48, 48)]]
27
+
28
+ max_epochs = 1000 # 40
29
+ train_batch_size_per_gpu = 200
30
+ validation_batch_size_per_gpu = 100
31
+ test_batch_size_per_gpu = 200 #768 #384
32
+ train_num_workers = 8
33
+
34
+ num_det_layers = 3
35
+
36
+ # Learning rate
37
+ base_lr = 0.01 #0.01
38
+ lr_factor = 0.1
39
+
40
+ load_from = 'https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco/yolov5_m-v61_syncbn_fast_8xb16-300e_coco_20220917_204944-516a710f.pth'
41
+
42
+ batch_shapes_cfg = dict(
43
+ img_size=img_scale[0],
44
+ batch_size=train_batch_size_per_gpu
45
+ )
46
+
47
+ pre_transform = _base_.pre_transform
48
+ affine_scale = _base_.affine_scale
49
+ mosaic_affine_pipeline = [
50
+ dict(
51
+ type='Mosaic',
52
+ img_scale=img_scale,
53
+ pad_val=114.0,
54
+ pre_transform=pre_transform),
55
+ dict(
56
+ type='YOLOv5RandomAffine',
57
+ max_rotate_degree=0.0,
58
+ max_shear_degree=0.0,
59
+ scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
60
+ # img_scale is (width, height)
61
+ border=(-img_scale[0] // 2, -img_scale[1] // 2),
62
+ border_val=(114, 114, 114))
63
+ ]
64
+
65
+ train_pipeline = [
66
+ *pre_transform,
67
+ *mosaic_affine_pipeline,
68
+ dict(
69
+ type='YOLOv5MixUp',
70
+ prob=_base_.mixup_prob,
71
+ pre_transform=[*pre_transform, *mosaic_affine_pipeline]),
72
+ dict(
73
+ type='mmdet.Albu',
74
+ transforms=_base_.albu_train_transforms,
75
+ bbox_params=dict(
76
+ type='BboxParams',
77
+ format='pascal_voc',
78
+ label_fields=['gt_bboxes_labels', 'gt_ignore_flags']),
79
+ keymap={
80
+ 'img': 'image',
81
+ 'gt_bboxes': 'bboxes'
82
+ }),
83
+ dict(type='YOLOv5HSVRandomAug'),
84
+ dict(type='mmdet.RandomFlip', prob=0.5),
85
+ dict(
86
+ type='mmdet.PackDetInputs',
87
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip',
88
+ 'flip_direction'))
89
+ ]
90
+
91
+ _base_.test_pipeline[next(i for i, v in enumerate(_base_.test_pipeline) if v.type=='YOLOv5KeepRatioResize')].scale = img_scale
92
+ _base_.test_pipeline[next(i for i, v in enumerate(_base_.test_pipeline) if v.type=='LetterResize')].scale = img_scale
93
+
94
+
95
+ model = dict(
96
+ backbone=dict(
97
+ deepen_factor=deepen_factor,
98
+ widen_factor=widen_factor,
99
+ ),
100
+ neck=dict(
101
+ deepen_factor=deepen_factor,
102
+ widen_factor=widen_factor,
103
+ in_channels=[256, 512, 1024],
104
+ out_channels=[256, 512, 1024],
105
+ num_csp_blocks=3,
106
+ ),
107
+ bbox_head=dict(
108
+ head_module=dict(
109
+ widen_factor=widen_factor,
110
+ num_classes=num_classes,
111
+ featmap_strides=[8, 16, 32],
112
+ in_channels=[256, 512, 1024],
113
+ num_base_priors=3
114
+ ),
115
+ prior_generator=dict(
116
+ base_sizes=anchors,
117
+ strides=[
118
+ 8,
119
+ 16,
120
+ 32,
121
+ ],
122
+ ),
123
+ loss_obj=dict(
124
+ loss_weight=_base_.loss_obj_weight * ((img_scale[0] / 640)**2 * 3 / num_det_layers)
125
+ ),
126
+ loss_cls=dict(
127
+ loss_weight=_base_.loss_cls_weight * (num_classes / 80 * 3 / num_det_layers)
128
+ ),
129
+ loss_bbox=dict(
130
+ loss_weight=_base_.loss_bbox_weight * (3 / num_det_layers),
131
+ ),
132
+ obj_level_weights=[
133
+ 4.0,
134
+ 1.0,
135
+ 0.4,
136
+ ],
137
+ ),
138
+ test_cfg=dict(
139
+ nms=dict(type='nms', iou_threshold=0.65), # NMS type and threshold
140
+ multi_label=False,
141
+ ),
142
+
143
+ )
144
+
145
+
146
+ train_dataloader = dict(
147
+ batch_size=train_batch_size_per_gpu,
148
+ num_workers=train_num_workers,
149
+ dataset=dict(
150
+ _delete_=True,
151
+ type='RepeatDataset',
152
+ times=1,
153
+ dataset=dict(
154
+ type='YOLOv5CocoDataset',
155
+ data_root=data_root_train,
156
+ ann_file=data_root_train + 'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
157
+ data_prefix=dict(img='images/'),
158
+ metainfo=metainfo,
159
+ filter_cfg=dict(filter_empty_gt=False),
160
+ pipeline=train_pipeline
161
+ )
162
+ )
163
+ )
164
+
165
+ val_dataloader = dict(
166
+ batch_size=validation_batch_size_per_gpu,
167
+ num_workers=train_num_workers,
168
+ dataset=dict(
169
+ data_root=data_root_val,
170
+ metainfo=metainfo,
171
+ ann_file=data_root_val+'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
172
+ data_prefix=dict(img='images/'),
173
+ pipeline=_base_.test_pipeline
174
+ )
175
+ )
176
+
177
+ test_dataloader = dict(
178
+ batch_size=test_batch_size_per_gpu,
179
+ num_workers=train_num_workers,
180
+ dataset=dict(
181
+ data_root=data_root_test,
182
+ metainfo=metainfo,
183
+ ann_file=data_root_test+'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
184
+ data_prefix=dict(img='images/'),
185
+ batch_shapes_cfg=batch_shapes_cfg,
186
+ pipeline=_base_.test_pipeline
187
+ )
188
+ )
189
+
190
+ _base_.optim_wrapper.optimizer.batch_size_per_gpu = train_batch_size_per_gpu
191
+ _base_.optim_wrapper.optimizer.lr = base_lr
192
+
193
+ val_evaluator = dict(
194
+ ann_file=data_root_val+'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
195
+ )
196
+ test_evaluator = dict(
197
+ ann_file=data_root_test+'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
198
+ )
199
+
200
+
201
+ default_hooks = dict(
202
+ checkpoint=dict(
203
+ interval=1,
204
+ max_keep_ckpts=1,
205
+ save_best=['coco/bbox_mAP', 'coco/bbox_mAP_50']
206
+ ),
207
+ # The warmup_mim_iter parameter is critical.
208
+ # The default value is 1000 which is not suitable for cat datasets.
209
+ param_scheduler=dict(
210
+ max_epochs=max_epochs,
211
+ warmup_mim_iter=1000,
212
+ lr_factor=lr_factor
213
+ ),
214
+ logger=dict(type='LoggerHook', interval=5))
215
+
216
+ train_cfg = dict(max_epochs=max_epochs, val_interval=1)
217
+ visualizer = dict(vis_backends=[dict(type='LocalVisBackend'), dict(type='TensorboardVisBackend')])
218
+
YOLOv8/best_coco_bbox_mAP_50_epoch_31.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b9e32fee489f2f8622735759e9c3a9ae4054b23fb1b82be111f043f72ec7ca4
3
- size 210319319
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd2f7d44fc3061e7f9e61ab952085a6778941f0bf3aa1cefa041580abeed83f5
3
+ size 134
YOLOv8/configs/yolov8/yolov8.py ADDED
@@ -0,0 +1,240 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = './yolov8_s_syncbn_fast_8xb16-500e_coco.py'
2
+
3
+ # ========================modified parameters======================
4
+ deepen_factor = 0.67
5
+ widen_factor = 0.75
6
+ last_stage_out_channels = 768
7
+
8
+ affine_scale = 0.9
9
+ mixup_prob = 0.1
10
+
11
+
12
+ img_scale = (128, 128) #_base_.img_scale
13
+ # img_scale = (640, 640) #_base_.img_scale
14
+ num_classes = 1
15
+ class_name = ('small',)
16
+ num_classes = len(class_name)
17
+ metainfo = dict(classes=class_name, palette=[(20, 220, 60)])
18
+
19
+ train_batch_size_per_gpu = 512
20
+ val_batch_size_per_gpu = 128
21
+ test_batch_size_per_gpu = 128
22
+
23
+ train_num_workers = 16
24
+ val_num_workers = 16
25
+ test_num_workers = 16
26
+
27
+ # -----train val related-----
28
+ # Base learning rate for optim_wrapper. Corresponding to 8xb16=64 bs
29
+ base_lr = 0.001
30
+ lr_factor = 0.01 # Learning rate scaling factor
31
+ max_epochs = 1000 # Maximum training epochs
32
+
33
+ # Disable mosaic augmentation for final 10 epochs (stage 2)
34
+ close_mosaic_epochs = 10
35
+
36
+ save_epoch_intervals = 1
37
+ max_keep_ckpts = 2
38
+
39
+ # validation intervals in stage 2
40
+ val_interval_stage2 = 1
41
+
42
+ # TRAIN DATASET
43
+ data_root_train = 'YOUR_PATH_TO_REAL_UGRC_TRAIN'
44
+ ann_file_train = 'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json'
45
+
46
+ # VAL DATASET
47
+ data_root_val = 'YOUR_PATH_TO_REAL_UGRC_VAL'
48
+ ann_file_val = 'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json'
49
+
50
+ # TEST DATASET
51
+ ## UGRC
52
+ data_root_test = 'YOUR_PATH_TO_REAL_UGRC_TEST'
53
+ ann_file_test = 'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json'
54
+
55
+
56
+ load_from = 'https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco/yolov8_m_syncbn_fast_8xb16-500e_coco_20230115_192200-c22e560a.pth'
57
+
58
+
59
+ # =======================Unmodified in most cases==================
60
+ pre_transform = _base_.pre_transform
61
+ last_transform = _base_.last_transform
62
+
63
+ model = dict(
64
+ backbone=dict(
65
+ last_stage_out_channels=last_stage_out_channels,
66
+ deepen_factor=deepen_factor,
67
+ widen_factor=widen_factor
68
+ ),
69
+ neck=dict(
70
+ deepen_factor=deepen_factor,
71
+ widen_factor=widen_factor,
72
+ in_channels=[256, 512, last_stage_out_channels],
73
+ out_channels=[256, 512, last_stage_out_channels]
74
+ ),
75
+ bbox_head=dict(
76
+ head_module=dict(
77
+ num_classes=num_classes,
78
+ widen_factor=widen_factor,
79
+ in_channels=[256, 512, last_stage_out_channels])
80
+ ),
81
+ train_cfg=dict(
82
+ assigner=dict(
83
+ num_classes=num_classes
84
+ )
85
+ )
86
+ )
87
+
88
+ mosaic_affine_transform = [
89
+ dict(
90
+ type='Mosaic',
91
+ img_scale=img_scale,
92
+ pad_val=114.0,
93
+ pre_transform=pre_transform),
94
+ dict(
95
+ type='YOLOv5RandomAffine',
96
+ max_rotate_degree=0.0,
97
+ max_shear_degree=0.0,
98
+ max_aspect_ratio=100,
99
+ scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
100
+ # img_scale is (width, height)
101
+ border=(-img_scale[0] // 2, -img_scale[1] // 2),
102
+ border_val=(114, 114, 114))
103
+ ]
104
+
105
+ # enable mixup
106
+ train_pipeline = [
107
+ *pre_transform, *mosaic_affine_transform,
108
+ dict(
109
+ type='YOLOv5MixUp',
110
+ prob=mixup_prob,
111
+ pre_transform=[*pre_transform, *mosaic_affine_transform]),
112
+ *last_transform
113
+ ]
114
+
115
+ train_pipeline_stage2 = [
116
+ *pre_transform,
117
+ dict(type='YOLOv5KeepRatioResize', scale=img_scale),
118
+ dict(
119
+ type='LetterResize',
120
+ scale=img_scale,
121
+ allow_scale_up=True,
122
+ pad_val=dict(img=114.0)
123
+ ),
124
+ dict(
125
+ type='YOLOv5RandomAffine',
126
+ max_rotate_degree=0.0,
127
+ max_shear_degree=0.0,
128
+ scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
129
+ max_aspect_ratio=100,
130
+ border_val=(114, 114, 114)
131
+ ),
132
+ *last_transform
133
+ ]
134
+
135
+ train_dataloader = dict(
136
+ batch_size=train_batch_size_per_gpu,
137
+ num_workers=train_num_workers,
138
+ dataset=dict(
139
+ data_root=data_root_train,
140
+ ann_file=data_root_train+ann_file_train,
141
+ data_prefix=dict(img='images/'),
142
+ filter_cfg=dict(filter_empty_gt=False),
143
+ metainfo=metainfo,
144
+ pipeline=train_pipeline
145
+ )
146
+ )
147
+
148
+ # _base_.test_pipeline[1].img_scale = img_scale
149
+ # _base_.test_pipeline[2].scale = img_scale
150
+
151
+ test_pipeline = [
152
+ dict(type='LoadImageFromFile', backend_args=_base_.backend_args),
153
+ dict(type='YOLOv5KeepRatioResize', scale=img_scale),
154
+ dict(
155
+ type='LetterResize',
156
+ scale=img_scale,
157
+ allow_scale_up=False,
158
+ pad_val=dict(img=114)),
159
+ dict(type='LoadAnnotations', with_bbox=True, _scope_='mmdet'),
160
+ dict(
161
+ type='mmdet.PackDetInputs',
162
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
163
+ 'scale_factor', 'pad_param'))
164
+ ]
165
+
166
+ val_dataloader = dict(
167
+ batch_size=val_batch_size_per_gpu,
168
+ num_workers=val_num_workers,
169
+ dataset=dict(
170
+ data_root=data_root_val,
171
+ ann_file=data_root_val+ann_file_val,
172
+ data_prefix=dict(img='images/'),
173
+ metainfo=metainfo,
174
+ # filter_cfg=dict(filter_empty_gt=False), # Does this make a change?
175
+ filter_cfg=dict(filter_empty_gt=True), # Does this make a change?
176
+ pipeline=test_pipeline,
177
+ )
178
+ )
179
+
180
+ test_dataloader = dict(
181
+ batch_size=test_batch_size_per_gpu,
182
+ num_workers=test_num_workers,
183
+ dataset=dict(
184
+ data_root=data_root_test,
185
+ ann_file=data_root_test+ann_file_test,
186
+ data_prefix=dict(img='images/'),
187
+ metainfo=metainfo,
188
+ filter_cfg=dict(filter_empty_gt=False), # Does this make a change?
189
+ pipeline=test_pipeline,
190
+ )
191
+ )
192
+
193
+
194
+ optim_wrapper = dict(
195
+ optimizer=dict(
196
+ lr=base_lr,
197
+ batch_size_per_gpu=train_batch_size_per_gpu
198
+ ),
199
+ )
200
+
201
+
202
+ default_hooks = dict(
203
+ param_scheduler=dict(
204
+ lr_factor=lr_factor,
205
+ max_epochs=max_epochs
206
+ ),
207
+ checkpoint=dict(
208
+ interval=save_epoch_intervals,
209
+ max_keep_ckpts=max_keep_ckpts,
210
+ save_best=['coco/bbox_mAP', 'coco/bbox_mAP_50']
211
+ )
212
+ )
213
+
214
+ _base_.custom_hooks[1].switch_epoch = max_epochs - close_mosaic_epochs
215
+ _base_.custom_hooks[1].switch_pipeline = train_pipeline_stage2
216
+
217
+ val_evaluator = dict(
218
+ ann_file=data_root_val + ann_file_val,
219
+ )
220
+
221
+ test_evaluator = dict(
222
+ ann_file= data_root_test + ann_file_test,
223
+ )
224
+
225
+ train_cfg = dict(
226
+ max_epochs=max_epochs,
227
+ val_interval=save_epoch_intervals,
228
+ dynamic_intervals=[
229
+ ((max_epochs - close_mosaic_epochs),
230
+ val_interval_stage2)
231
+ ]
232
+ )
233
+
234
+
235
+ visualizer = dict(
236
+ vis_backends=[
237
+ dict(type='LocalVisBackend'),
238
+ dict(type='TensorboardVisBackend')
239
+ ]
240
+ )