xiaofanghf commited on
Commit
66ab6a3
·
verified ·
1 Parent(s): 0966166

Upload folder using huggingface_hub

Browse files
FasterRCNN/best_coco_bbox_mAP_50_epoch_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa172df9b6e3ddb3c258aabba90cb970cc44fe3037bdf4f81e361fa0a45ba1fc
3
- size 165917385
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3ea5ae3361c0b0bc75235ce9ae7a4629ad280ee86aa56c1ab130dcbbf53bfc9
3
+ size 134
FasterRCNN/configs/faster_rcnn/faster-rcnn.py ADDED
@@ -0,0 +1,407 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = [
2
+ '../_base_/models/faster-rcnn_r50_fpn.py',
3
+ '../_base_/datasets/coco_detection.py',
4
+ '../_base_/schedules/schedule_2x.py', '../_base_/default_runtime.py'
5
+ ]
6
+
7
+
8
+
9
+ # TRAIN DATASET
10
+ data_root_train = 'YOUR_PATH_TO_REAL_LINZ_TRAIN'
11
+
12
+ # VAL DATASET
13
+ data_root_val = 'YOUR_PATH_TO_REAL_LINZ_VAL'
14
+
15
+ # TEST DATASET
16
+ ## LINZ
17
+ data_root_test = 'YOUR_PATH_TO_REAL_LINZ_TEST'
18
+
19
+
20
+ max_epochs = 1000 # 40
21
+ train_batch_size_per_gpu = 64
22
+ validation_batch_size_per_gpu = 64
23
+ test_batch_size_per_gpu = 64
24
+ num_workers = 8
25
+
26
+
27
+ class_name = ('small',)
28
+ num_classes = len(class_name)
29
+ metainfo = dict(classes=class_name, palette=[(20, 220, 60)])
30
+
31
+ img_scale = (128, 128)
32
+
33
+ affine_scale = 0.9
34
+
35
+ load_from = 'https://download.openxlab.org.cn/models/mmdetection/FasterR-CNN/weight/faster-rcnn_r50_fpn_2x_coco'
36
+
37
+
38
+ # model settings
39
+ model = dict(
40
+ type='FasterRCNN',
41
+ data_preprocessor=dict(
42
+ type='DetDataPreprocessor',
43
+ mean=[123.675, 116.28, 103.53],
44
+ std=[58.395, 57.12, 57.375],
45
+ bgr_to_rgb=True,
46
+ pad_size_divisor=32),
47
+ backbone=dict(
48
+ type='ResNet',
49
+ depth=50,
50
+ num_stages=4,
51
+ out_indices=(0, 1, 2, 3),
52
+ frozen_stages=1,
53
+ norm_cfg=dict(type='BN', requires_grad=True),
54
+ norm_eval=True,
55
+ style='pytorch',
56
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
57
+ neck=dict(
58
+ type='FPN',
59
+ in_channels=[256, 512, 1024, 2048],
60
+ out_channels=256,
61
+ num_outs=5),
62
+ rpn_head=dict(
63
+ type='RPNHead',
64
+ in_channels=256,
65
+ feat_channels=256,
66
+ anchor_generator=dict(
67
+ type='AnchorGenerator',
68
+ scales=[8],
69
+ ratios=[0.5, 1.0, 2.0],
70
+ strides=[4, 8, 16, 32, 64]),
71
+ bbox_coder=dict(
72
+ type='DeltaXYWHBBoxCoder',
73
+ target_means=[.0, .0, .0, .0],
74
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
75
+ loss_cls=dict(
76
+ type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
77
+ loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
78
+ roi_head=dict(
79
+ type='StandardRoIHead',
80
+ bbox_roi_extractor=dict(
81
+ type='SingleRoIExtractor',
82
+ roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
83
+ out_channels=256,
84
+ featmap_strides=[4, 8, 16, 32]),
85
+ bbox_head=dict(
86
+ type='Shared2FCBBoxHead',
87
+ in_channels=256,
88
+ fc_out_channels=1024,
89
+ roi_feat_size=7,
90
+ num_classes=num_classes,
91
+ bbox_coder=dict(
92
+ type='DeltaXYWHBBoxCoder',
93
+ target_means=[0., 0., 0., 0.],
94
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
95
+ reg_class_agnostic=False,
96
+ loss_cls=dict(
97
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
98
+ loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
99
+ # model training and testing settings
100
+ train_cfg=dict(
101
+ rpn=dict(
102
+ assigner=dict(
103
+ type='MaxIoUAssigner',
104
+ pos_iou_thr=0.7,
105
+ neg_iou_thr=0.3,
106
+ min_pos_iou=0.3,
107
+ match_low_quality=True,
108
+ ignore_iof_thr=-1),
109
+ sampler=dict(
110
+ type='RandomSampler',
111
+ num=256,
112
+ pos_fraction=0.5,
113
+ neg_pos_ub=-1,
114
+ add_gt_as_proposals=False),
115
+ allowed_border=-1,
116
+ pos_weight=-1,
117
+ debug=False),
118
+ rpn_proposal=dict(
119
+ nms_pre=2000,
120
+ max_per_img=1000,
121
+ nms=dict(type='nms', iou_threshold=0.7),
122
+ min_bbox_size=0),
123
+ rcnn=dict(
124
+ assigner=dict(
125
+ type='MaxIoUAssigner',
126
+ pos_iou_thr=0.5,
127
+ neg_iou_thr=0.5,
128
+ min_pos_iou=0.5,
129
+ match_low_quality=False,
130
+ ignore_iof_thr=-1),
131
+ sampler=dict(
132
+ type='RandomSampler',
133
+ num=512,
134
+ pos_fraction=0.25,
135
+ neg_pos_ub=-1,
136
+ add_gt_as_proposals=True),
137
+ pos_weight=-1,
138
+ debug=False)),
139
+ test_cfg=dict(
140
+ rpn=dict(
141
+ nms_pre=1000,
142
+ max_per_img=1000,
143
+ nms=dict(type='nms', iou_threshold=0.7),
144
+ min_bbox_size=0),
145
+ rcnn=dict(
146
+ score_thr=0.05,
147
+ nms=dict(type='nms', iou_threshold=0.5),
148
+ max_per_img=100)
149
+ # soft-nms is also supported for rcnn testing
150
+ # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05)
151
+ ))
152
+
153
+
154
+
155
+ dataset_type = 'CocoDataset'
156
+
157
+ backend_args = None
158
+
159
+ # Original
160
+ # train_pipeline = [
161
+ # dict(type='LoadImageFromFile', backend_args=backend_args),
162
+ # dict(type='LoadAnnotations', with_bbox=True),
163
+ # dict(type='Resize', scale=img_scale, keep_ratio=True),
164
+ # dict(type='RandomFlip', prob=0.5),
165
+ # dict(type='PackDetInputs')
166
+ # ]
167
+
168
+ pre_transform = [
169
+ dict(type='LoadImageFromFile', backend_args=backend_args),
170
+ dict(type='LoadAnnotations', with_bbox=True)
171
+ ]
172
+
173
+ albu_train_transforms = [
174
+ dict(type='Blur', p=0.01),
175
+ dict(type='MedianBlur', p=0.01),
176
+ dict(type='ToGray', p=0.01),
177
+ dict(type='CLAHE', p=0.01)
178
+ ]
179
+
180
+ last_transform = [
181
+ dict(
182
+ type='Albu',
183
+ transforms=albu_train_transforms,
184
+ bbox_params=dict(
185
+ type='BboxParams',
186
+ format='pascal_voc',
187
+ label_fields=['gt_bboxes_labels', 'gt_ignore_flags']),
188
+ keymap={
189
+ 'img': 'image',
190
+ 'gt_bboxes': 'bboxes'
191
+ }),
192
+ dict(type='YOLOXHSVRandomAug'), # ???
193
+ dict(type='RandomFlip', prob=0.5),
194
+ dict(
195
+ type='PackDetInputs',
196
+ meta_keys=(
197
+ 'img_id',
198
+ 'img_path',
199
+ 'ori_shape',
200
+ 'img_shape',
201
+ 'flip',
202
+ 'flip_direction'
203
+ )
204
+ )
205
+ ]
206
+
207
+ mosaic_affine_transform = [
208
+ dict(
209
+ type='Mosaic',
210
+ img_scale=img_scale,
211
+ pad_val=114.0,
212
+ ),
213
+ dict(
214
+ type='RandomAffine',
215
+ max_rotate_degree=0.0,
216
+ max_shear_degree=0.0,
217
+ scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
218
+ # img_scale is (width, height)
219
+ border=(-img_scale[0] // 2, -img_scale[1] // 2),
220
+ border_val=(114, 114, 114))
221
+ ]
222
+
223
+ train_pipeline = [
224
+ *pre_transform,
225
+ *mosaic_affine_transform,
226
+ dict(
227
+ type='MixUp',
228
+ img_scale=img_scale,
229
+ ),
230
+ *last_transform
231
+ ]
232
+
233
+
234
+ # Original
235
+ # train_dataloader = dict(
236
+ # batch_size=train_batch_size_per_gpu,
237
+ # num_workers=num_workers,
238
+ # persistent_workers=True,
239
+ # sampler=dict(type='DefaultSampler', shuffle=True),
240
+ # batch_sampler=dict(type='AspectRatioBatchSampler'),
241
+ # dataset=dict(
242
+ # type=dataset_type,
243
+ # data_root=data_root_train,
244
+ # ann_file='annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500_BalancedRatio:0.2000.json',
245
+ # data_prefix=dict(img='images/'),
246
+ # filter_cfg=dict(filter_empty_gt=False, min_size=32),
247
+ # pipeline=train_pipeline,
248
+ # metainfo=metainfo,
249
+ # backend_args=backend_args
250
+ # )
251
+ # )
252
+
253
+ train_dataloader = dict(
254
+ batch_size=train_batch_size_per_gpu,
255
+ num_workers=num_workers,
256
+ persistent_workers=True,
257
+ sampler=dict(type='DefaultSampler', shuffle=True),
258
+ batch_sampler=dict(type='AspectRatioBatchSampler'),
259
+ dataset=dict(
260
+ _delete_=True,
261
+ type='MultiImageMixDataset',
262
+ dataset=dict(
263
+ type=dataset_type,
264
+ data_root=data_root_train,
265
+ ann_file='annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
266
+ data_prefix=dict(img='images/'),
267
+ filter_cfg=dict(filter_empty_gt=False, min_size=32),
268
+ metainfo=metainfo,
269
+ backend_args=backend_args,
270
+ pipeline=pre_transform
271
+ ),
272
+ pipeline=train_pipeline,
273
+ )
274
+ )
275
+
276
+
277
+
278
+ test_pipeline = [
279
+ dict(type='LoadImageFromFile', backend_args=backend_args),
280
+ dict(type='Resize', scale=img_scale, keep_ratio=True),
281
+ # If you don't have a gt annotation, delete the pipeline
282
+ dict(type='LoadAnnotations', with_bbox=True),
283
+ dict(
284
+ type='PackDetInputs',
285
+ meta_keys=(
286
+ 'img_id', 'img_path', 'ori_shape', 'img_shape',
287
+ 'scale_factor'
288
+ )
289
+ )
290
+ ]
291
+
292
+
293
+ val_dataloader = dict(
294
+ batch_size=validation_batch_size_per_gpu,
295
+ num_workers=num_workers,
296
+ persistent_workers=True,
297
+ drop_last=False,
298
+ sampler=dict(type='DefaultSampler', shuffle=False),
299
+ dataset=dict(
300
+ type=dataset_type,
301
+ data_root=data_root_val,
302
+ ann_file='annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
303
+ data_prefix=dict(img='images/'),
304
+ filter_cfg=dict(filter_empty_gt=False, min_size=32),
305
+ test_mode=True,
306
+ pipeline=test_pipeline,
307
+ metainfo=metainfo,
308
+ backend_args=backend_args
309
+ )
310
+ )
311
+
312
+ test_dataloader = dict(
313
+ batch_size=test_batch_size_per_gpu,
314
+ num_workers=num_workers,
315
+ persistent_workers=True,
316
+ drop_last=False,
317
+ sampler=dict(type='DefaultSampler', shuffle=False),
318
+ dataset=dict(
319
+ type=dataset_type,
320
+ data_root=data_root_test,
321
+ ann_file='annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
322
+ data_prefix=dict(img='images/'),
323
+ filter_cfg=dict(filter_empty_gt=False, min_size=32),
324
+ test_mode=True,
325
+ pipeline=test_pipeline,
326
+ metainfo=metainfo,
327
+ backend_args=backend_args
328
+ )
329
+ )
330
+
331
+ # test_dataloader = val_dataloader
332
+
333
+ val_evaluator = dict(
334
+ type='CocoMetric',
335
+ ann_file=data_root_val + 'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
336
+ metric='bbox',
337
+ format_only=False,
338
+ backend_args=backend_args
339
+ )
340
+
341
+ test_evaluator = dict(
342
+ type='CocoMetric',
343
+ ann_file=data_root_test + 'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
344
+ metric='bbox',
345
+ format_only=False,
346
+ backend_args=backend_args
347
+ )
348
+
349
+ # test_evaluator = val_evaluator
350
+
351
+
352
+
353
+ # training schedule for 2x
354
+ train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=max_epochs, val_interval=1)
355
+ val_cfg = dict(type='ValLoop')
356
+ test_cfg = dict(type='TestLoop')
357
+
358
+ # learning rate
359
+ param_scheduler = [
360
+ dict(
361
+ type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500),
362
+ dict(
363
+ type='MultiStepLR',
364
+ begin=0,
365
+ end=max_epochs,
366
+ by_epoch=True,
367
+ milestones=[16, 22],
368
+ gamma=0.1)
369
+ ]
370
+
371
+ # optimizer
372
+ optim_wrapper = dict(
373
+ type='OptimWrapper',
374
+ optimizer=dict(
375
+ type='SGD',
376
+ lr=0.2,
377
+ momentum=0.9,
378
+ weight_decay=0.0001
379
+ )
380
+ )
381
+
382
+ # Default setting for scaling LR automatically
383
+ # - `enable` means enable scaling LR automatically
384
+ # or not by default.
385
+ # - `base_batch_size` = (8 GPUs) x (2 samples per GPU)
386
+ auto_scale_lr = dict(enable=False, base_batch_size=train_batch_size_per_gpu)
387
+
388
+
389
+ default_hooks = dict(
390
+ checkpoint=dict(
391
+ interval=1,
392
+ max_keep_ckpts=1,
393
+ save_best=['coco/bbox_mAP', 'coco/bbox_mAP_50']
394
+ ),
395
+ # The warmup_mim_iter parameter is critical.
396
+ # The default value is 1000 which is not suitable for cat datasets.
397
+ # param_scheduler=dict(
398
+ # max_epochs=max_epochs,
399
+ # warmup_mim_iter=1000,
400
+ # lr_factor=lr_factor
401
+ # ),
402
+ logger=dict(type='LoggerHook', interval=5))
403
+
404
+ vis_backends = [dict(type='LocalVisBackend'), dict(type='TensorboardVisBackend')]
405
+ visualizer = dict(
406
+ type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
407
+
README.md CHANGED
@@ -3,8 +3,16 @@ license: cc-by-nc-4.0
3
  language:
4
  - en
5
  pipeline_tag: object-detection
 
6
  ---
 
 
 
 
 
7
 
8
  ## References
9
 
10
- * [Adapting Vehicle Detectors for Aerial Imagery to Unseen Domains with Weak Supervision](https://arxiv.org/abs/2507.20976)
 
 
 
3
  language:
4
  - en
5
  pipeline_tag: object-detection
6
+ library_name: mmdetection
7
  ---
8
+ ## Introduction
9
+ We introduce a real-world aerial view datasets, LINZ, captured in Selwyn (New Zealand). The dataset has ground sampling distance (GSD) of 12.5 cm per px and have been sampled to 112 px × 112 px image size. For data annotation, we label only the small vehicle centers. To leverage the abundance of bounding box-based open-source object detection frameworks, we define a fixed-size ground truth bounding box of 42.36 px × 42.36 px center at each vehicle. Annotations are provided in COCO format [x, y, w, h], where "small" in the annotation json files denotes the small vehicle class and (x, y) denotes the top-left corner of the bounding box. We use AP50 as evaluation metrics.
10
+
11
+ ## Model Usage
12
+ This folder contains four detectors trained on Real LINZ data and tested on Real LINZ data, along with configuration files we use for training and testing.
13
 
14
  ## References
15
 
16
+ ➡️ **Paper:** [Adapting Vehicle Detectors for Aerial Imagery to Unseen Domains with Weak Supervision](https://arxiv.org/abs/2507.20976)
17
+ ➡️ **Project Page:** [Webpage](https://humansensinglab.github.io/AGenDA/)
18
+ ➡️ **Data:** [AGenDA](https://github.com/humansensinglab/AGenDA/tree/main/Data)
ViTDet/best_coco_bbox_mAP_50_iter_16000.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d60857c5f22007ce2a8711bcf3d27a77818888fbea48ff8b529dd6f5ee7b397
3
- size 434474871
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:657c55b1a0b3177d5a754720a26d7a8433ef3005a58fe482dbd78857efd4204b
3
+ size 134
ViTDet/projects/ViTDet/configs/vitdet.py ADDED
@@ -0,0 +1,432 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = [
2
+ '../../../configs/_base_/default_runtime.py',
3
+ '../../../configs/_base_/models/mask-rcnn_r50_fpn.py',
4
+ ]
5
+
6
+ custom_imports = dict(imports=['projects.ViTDet.vitdet'])
7
+
8
+
9
+ ## TRAIN DATASET
10
+ data_root_train = 'YOUR_PATH_TO_REAL_LINZ_TRAIN'
11
+
12
+
13
+ ## VALIDATION DATASET
14
+ data_root_val = 'YOUR_PATH_TO_REAL_LINZ_VAL'
15
+
16
+ # TEST DATASET
17
+ ## LINZ
18
+ data_root_test = 'YOUR_PATH_TO_REAL_LINZ_TEST'
19
+
20
+
21
+ train_batch_size_per_gpu = 24
22
+ val_batch_size_per_gpu = 12
23
+ test_batch_size_per_gpu = 60
24
+
25
+ num_workers = 8
26
+
27
+ max_epochs = 100
28
+
29
+
30
+ # img_scale = (1024, 1024)
31
+ # img_scale = (384, 384)
32
+ img_scale = (128, 128)
33
+
34
+ affine_scale = 0.9
35
+
36
+ class_name = ('small',)
37
+ num_classes = len(class_name)
38
+ metainfo = dict(classes=class_name, palette=[(20, 220, 60)])
39
+
40
+
41
+
42
+ load_from = 'https://download.openmmlab.com/mmdetection/v3.0/vitdet/vitdet_mask-rcnn_vit-b-mae_lsj-100e/vitdet_mask-rcnn_vit-b-mae_lsj-100e_20230328_153519-e15fe294.pth'
43
+
44
+
45
+ # MODEL SETTINGS
46
+ backbone_norm_cfg = dict(type='LN', requires_grad=True)
47
+ norm_cfg = dict(type='LN2d', requires_grad=True)
48
+
49
+ batch_augments = [
50
+ dict(type='BatchFixedSizePad', size=img_scale, pad_mask=True)
51
+ ]
52
+
53
+ model = dict(
54
+ data_preprocessor=dict(pad_size_divisor=32, batch_augments=batch_augments),
55
+ backbone=dict(
56
+ _delete_=True,
57
+ type='ViT',
58
+ # img_size=1024,
59
+ # img_size=384,
60
+ img_size=img_scale[0],
61
+ patch_size=16,
62
+ embed_dim=768,
63
+ depth=12,
64
+ num_heads=12,
65
+ drop_path_rate=0.1,
66
+ window_size=14,
67
+ mlp_ratio=4,
68
+ qkv_bias=True,
69
+ norm_cfg=backbone_norm_cfg,
70
+ window_block_indexes=[
71
+ 0,
72
+ 1,
73
+ 3,
74
+ 4,
75
+ 6,
76
+ 7,
77
+ 9,
78
+ 10,
79
+ ],
80
+ use_rel_pos=True,
81
+ init_cfg=dict(
82
+ type='Pretrained',
83
+ # checkpoint='mae_pretrain_vit_base.pth'
84
+ # checkpoint='detectron2://ImageNetPretrained/MAE/mae_pretrain_vit_base.pth'
85
+ checkpoint='vitdet_mask-rcnn_vit-b-mae_lsj-100e_20230328_153519-e15fe294.pth'
86
+ )
87
+ ),
88
+ neck=dict(
89
+ _delete_=True,
90
+ type='SimpleFPN',
91
+ backbone_channel=768,
92
+ in_channels=[192, 384, 768, 768],
93
+ out_channels=256,
94
+ num_outs=5,
95
+ norm_cfg=norm_cfg),
96
+ rpn_head=dict(num_convs=2),
97
+ roi_head=dict(
98
+ bbox_head=dict(
99
+ type='Shared4Conv1FCBBoxHead',
100
+ conv_out_channels=256,
101
+ norm_cfg=norm_cfg,
102
+ num_classes=num_classes
103
+ ),
104
+ # mask_head=dict( # No masks as used
105
+ # norm_cfg=norm_cfg,
106
+ # num_classes=1,
107
+ # loss_mask=dict(
108
+ # use_mask=False
109
+ # ),
110
+ # )
111
+ mask_head=None
112
+ )
113
+ )
114
+
115
+ custom_hooks = [dict(type='Fp16CompresssionHook')]
116
+
117
+
118
+
119
+
120
+
121
+
122
+
123
+
124
+ ##
125
+ dataset_type = 'CocoDataset'
126
+ backend_args = None
127
+
128
+ # Original
129
+ # train_pipeline = [
130
+ # dict(type='LoadImageFromFile', backend_args=backend_args),
131
+ # dict(
132
+ # type='LoadAnnotations',
133
+ # with_bbox=True,
134
+ # # with_mask=True
135
+ # with_mask=False
136
+ # ),
137
+ # dict(type='RandomFlip', prob=0.5),
138
+ # dict(
139
+ # type='RandomResize',
140
+ # scale=img_scale,
141
+ # ratio_range=(0.1, 2.0),
142
+ # keep_ratio=True),
143
+ # dict(
144
+ # type='RandomCrop',
145
+ # crop_type='absolute_range',
146
+ # crop_size=img_scale,
147
+ # recompute_bbox=True,
148
+ # allow_negative_crop=True),
149
+ # dict(type='FilterAnnotations', min_gt_bbox_wh=(1e-2, 1e-2)),
150
+ # dict(type='Pad', size=img_scale, pad_val=dict(img=(114, 114, 114))),
151
+ # dict(type='PackDetInputs')
152
+ # ]
153
+
154
+ pre_transform = [
155
+ dict(type='LoadImageFromFile', backend_args=backend_args),
156
+ dict(type='LoadAnnotations', with_bbox=True, with_mask=False)
157
+ ]
158
+
159
+ albu_train_transforms = [
160
+ dict(type='Blur', p=0.01),
161
+ dict(type='MedianBlur', p=0.01),
162
+ dict(type='ToGray', p=0.01),
163
+ dict(type='CLAHE', p=0.01)
164
+ ]
165
+
166
+ last_transform = [
167
+ dict(
168
+ type='Albu',
169
+ transforms=albu_train_transforms,
170
+ bbox_params=dict(
171
+ type='BboxParams',
172
+ format='pascal_voc',
173
+ label_fields=['gt_bboxes_labels', 'gt_ignore_flags']),
174
+ keymap={
175
+ 'img': 'image',
176
+ 'gt_bboxes': 'bboxes'
177
+ }),
178
+ dict(type='YOLOXHSVRandomAug'), # ???
179
+ dict(type='RandomFlip', prob=0.5),
180
+ dict(
181
+ type='PackDetInputs',
182
+ meta_keys=(
183
+ 'img_id',
184
+ 'img_path',
185
+ 'ori_shape',
186
+ 'img_shape',
187
+ 'flip',
188
+ 'flip_direction'
189
+ )
190
+ )
191
+ ]
192
+
193
+ mosaic_affine_transform = [
194
+ dict(
195
+ type='Mosaic',
196
+ img_scale=img_scale,
197
+ pad_val=114.0,
198
+ ),
199
+ dict(
200
+ type='RandomAffine',
201
+ max_rotate_degree=0.0,
202
+ max_shear_degree=0.0,
203
+ scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
204
+ # img_scale is (width, height)
205
+ border=(-img_scale[0] // 2, -img_scale[1] // 2),
206
+ border_val=(114, 114, 114))
207
+ ]
208
+
209
+ train_pipeline = [
210
+ *pre_transform,
211
+ *mosaic_affine_transform,
212
+ dict(
213
+ type='MixUp',
214
+ img_scale=img_scale,
215
+ ),
216
+ *last_transform
217
+ ]
218
+
219
+
220
+
221
+ # Original
222
+ # train_dataloader = dict(
223
+ # batch_size=train_batch_size_per_gpu,
224
+ # num_workers=num_workers,
225
+ # persistent_workers=True,
226
+ # sampler=dict(type='DefaultSampler', shuffle=True),
227
+ # dataset=dict(
228
+ # type=dataset_type,
229
+ # data_root=data_root_train,
230
+ # ann_file=data_root_train + 'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
231
+ # data_prefix=dict(img='images/'),
232
+ # # filter_cfg=dict(filter_empty_gt=True, min_size=32),
233
+ # filter_cfg=dict(filter_empty_gt=False),
234
+ # pipeline=train_pipeline,
235
+ # metainfo=metainfo,
236
+ # )
237
+ # )
238
+
239
+ train_dataloader = dict(
240
+ batch_size=train_batch_size_per_gpu,
241
+ num_workers=num_workers,
242
+ persistent_workers=True,
243
+ sampler=dict(type='DefaultSampler', shuffle=True),
244
+ batch_sampler=dict(type='AspectRatioBatchSampler'),
245
+ dataset=dict(
246
+ # _delete_=True,
247
+ type='MultiImageMixDataset',
248
+ dataset=dict(
249
+ type=dataset_type,
250
+ data_root=data_root_train,
251
+ ann_file=data_root_train + 'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
252
+ data_prefix=dict(img='images/'),
253
+ filter_cfg=dict(filter_empty_gt=False, min_size=32),
254
+ metainfo=metainfo,
255
+ backend_args=backend_args,
256
+ pipeline=pre_transform
257
+ ),
258
+ pipeline=train_pipeline,
259
+ )
260
+ )
261
+
262
+
263
+ test_pipeline = [
264
+ dict(type='LoadImageFromFile', backend_args=backend_args),
265
+ dict(type='Resize', scale=img_scale, keep_ratio=True),
266
+ dict(type='Pad', size=img_scale, pad_val=dict(img=(114, 114, 114))),
267
+ dict(
268
+ type='LoadAnnotations',
269
+ with_bbox=True,
270
+ # with_mask=True
271
+ with_mask=False
272
+ ),
273
+ dict(
274
+ type='PackDetInputs',
275
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
276
+ 'scale_factor'))
277
+ ]
278
+
279
+ val_dataloader = dict(
280
+ batch_size=val_batch_size_per_gpu,
281
+ num_workers=num_workers,
282
+ persistent_workers=True,
283
+ drop_last=False,
284
+ sampler=dict(type='DefaultSampler', shuffle=False),
285
+ dataset=dict(
286
+ type=dataset_type,
287
+ data_root=data_root_val,
288
+ ann_file=data_root_val + 'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
289
+ data_prefix=dict(img='images/'),
290
+ test_mode=True,
291
+ pipeline=test_pipeline,
292
+ metainfo=metainfo,
293
+ )
294
+ )
295
+ # test_dataloader = val_dataloader
296
+ test_dataloader = dict(
297
+ batch_size=test_batch_size_per_gpu,
298
+ num_workers=num_workers,
299
+ persistent_workers=True,
300
+ drop_last=False,
301
+ sampler=dict(type='DefaultSampler', shuffle=False),
302
+ dataset=dict(
303
+ type=dataset_type,
304
+ data_root=data_root_test,
305
+ ann_file=data_root_test + 'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
306
+ data_prefix=dict(img='images/'),
307
+ test_mode=True,
308
+ pipeline=test_pipeline,
309
+ metainfo=metainfo,
310
+ )
311
+ )
312
+
313
+ val_evaluator = dict(
314
+ type='CocoMetric',
315
+ ann_file=data_root_val + 'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
316
+ metric='bbox',
317
+ format_only=False)
318
+ # test_evaluator = val_evaluator
319
+ test_evaluator = dict(
320
+ type='CocoMetric',
321
+ ann_file=data_root_test + 'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
322
+ metric='bbox',
323
+ format_only=False
324
+ )
325
+
326
+ optim_wrapper = dict(
327
+ type='AmpOptimWrapper',
328
+ constructor='LayerDecayOptimizerConstructor',
329
+ paramwise_cfg={
330
+ 'decay_rate': 0.7,
331
+ 'decay_type': 'layer_wise',
332
+ 'num_layers': 12,
333
+ },
334
+ optimizer=dict(
335
+ type='AdamW',
336
+ # lr=0.0001,
337
+ # lr=0.01,
338
+ lr=0.001,
339
+ betas=(0.9, 0.999),
340
+ weight_decay=0.1,
341
+ ))
342
+
343
+ # 100 ep = 184375 iters * 64 images/iter / 118000 images/ep
344
+ # max_iters = 184375
345
+ # interval = 5000
346
+ max_iters = 100000
347
+
348
+ # interval = 2000
349
+ interval = 1000
350
+
351
+ dynamic_intervals = [(max_iters // interval * interval + 1, max_iters)]
352
+ param_scheduler = [
353
+ dict(
354
+ type='LinearLR',
355
+ start_factor=0.001,
356
+ by_epoch=False,
357
+ begin=0,
358
+ end=250
359
+ ),
360
+ dict(
361
+ type='MultiStepLR',
362
+ begin=0,
363
+
364
+ end=max_iters,
365
+ # end=max_epochs,
366
+
367
+ by_epoch=False,
368
+ # by_epoch=True,
369
+
370
+ # 88 ep = [163889 iters * 64 images/iter / 118000 images/ep
371
+ # 96 ep = [177546 iters * 64 images/iter / 118000 images/ep
372
+ # milestones=[20, 29],
373
+ # milestones=[5000, 6000],
374
+ milestones=[1000, 2000],
375
+ gamma=0.1
376
+ )
377
+ ]
378
+
379
+ train_cfg = dict(
380
+ type='IterBasedTrainLoop',
381
+ max_iters=max_iters,
382
+ val_interval=interval,
383
+ dynamic_intervals=dynamic_intervals
384
+ )
385
+ # train_cfg = dict(
386
+ # type='EpochBasedTrainLoop',
387
+ # max_epochs=max_epochs,
388
+ # val_interval=1
389
+ # )
390
+
391
+ val_cfg = dict(type='ValLoop')
392
+ test_cfg = dict(type='TestLoop')
393
+
394
+ default_hooks = dict(
395
+ logger=dict(
396
+ type='LoggerHook',
397
+ interval=50,
398
+ log_metric_by_epoch=False
399
+ ),
400
+ checkpoint=dict(
401
+ type='CheckpointHook',
402
+ by_epoch=False,
403
+ # by_epoch=True,
404
+ save_last=True,
405
+ # interval=1,
406
+ interval=interval,
407
+ save_best=['coco/bbox_mAP', 'coco/bbox_mAP_50'],
408
+ max_keep_ckpts=2
409
+ )
410
+ )
411
+
412
+ vis_backends = [
413
+ dict(type='LocalVisBackend'),
414
+ dict(type='TensorboardVisBackend')
415
+ ]
416
+
417
+ visualizer = dict(
418
+ type='DetLocalVisualizer',
419
+ vis_backends=vis_backends,
420
+ name='visualizer'
421
+ )
422
+
423
+ log_processor = dict(
424
+ type='LogProcessor',
425
+ window_size=50,
426
+ by_epoch=False
427
+ # by_epoch=True
428
+ )
429
+
430
+ auto_scale_lr = dict(base_batch_size=64)
431
+
432
+
YOLOv5/best_coco_bbox_mAP_50_epoch_429.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:89a69bbeb369be3ee42ae83b32ad691c22a9cae5173abb999fc46080b73b19e0
3
- size 173121983
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2425e8520bd355a827a65fe5f3b0cd42f52796078e8acb3175f97d2861fdeedf
3
+ size 134
YOLOv5/configs/yolov5/yolov5.py ADDED
@@ -0,0 +1,218 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = './yolov5_m-v61_syncbn_fast_8xb16-300e_coco.py'
2
+ deepen_factor = 0.67
3
+ widen_factor = 0.75
4
+
5
+ # TRAIN DATASET
6
+ data_root_train = 'YOUR_PATH_TO_REAL_LINZ_TRAIN'
7
+
8
+ # VAL DATASET
9
+ data_root_val = 'YOUR_PATH_TO_REAL_LINZ_VAL'
10
+
11
+ # TEST DATASET
12
+ ## LINZ
13
+ data_root_test = 'YOUR_PATH_TO_REAL_LINZ_TEST'
14
+
15
+
16
+ class_name = ('small',)
17
+ num_classes = len(class_name)
18
+ metainfo = dict(classes=class_name, palette=[(20, 220, 60)])
19
+
20
+ img_scale = (128, 128)
21
+ # img_scale = (112, 112)
22
+
23
+ # Estimated with " python ./tools/analysis_tools/optimize_anchors.py --input-shape 128 128 --augment-args 0.1 1.9 --algorithm v5-k-means configs/..."
24
+ # anchors = [[(25, 32), (53, 69), (159, 220)], [(235, 166), (242, 242), (310, 337)], [(365, 375), (230, 681), (679, 324)]]
25
+ # anchors = [[(157, 155), (239, 133), (136, 238)], [(240, 165), (170, 237), (236, 191)], [(206, 240), (241, 217), (242, 242)]]
26
+ anchors = [[(31, 28), (32, 37), (27, 48)], [(48, 27), (47, 34), (34, 48)], [(41, 48), (49, 41), (48, 48)]]
27
+
28
+ max_epochs = 1000 # 40
29
+ train_batch_size_per_gpu = 200
30
+ validation_batch_size_per_gpu = 100
31
+ test_batch_size_per_gpu = 200 #768 #384
32
+ train_num_workers = 8
33
+
34
+ num_det_layers = 3
35
+
36
+ # Learning rate
37
+ base_lr = 0.01 #0.01
38
+ lr_factor = 0.1
39
+
40
+ load_from = 'https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco/yolov5_m-v61_syncbn_fast_8xb16-300e_coco_20220917_204944-516a710f.pth'
41
+
42
+ batch_shapes_cfg = dict(
43
+ img_size=img_scale[0],
44
+ batch_size=train_batch_size_per_gpu
45
+ )
46
+
47
+ pre_transform = _base_.pre_transform
48
+ affine_scale = _base_.affine_scale
49
+ mosaic_affine_pipeline = [
50
+ dict(
51
+ type='Mosaic',
52
+ img_scale=img_scale,
53
+ pad_val=114.0,
54
+ pre_transform=pre_transform),
55
+ dict(
56
+ type='YOLOv5RandomAffine',
57
+ max_rotate_degree=0.0,
58
+ max_shear_degree=0.0,
59
+ scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
60
+ # img_scale is (width, height)
61
+ border=(-img_scale[0] // 2, -img_scale[1] // 2),
62
+ border_val=(114, 114, 114))
63
+ ]
64
+
65
+ train_pipeline = [
66
+ *pre_transform,
67
+ *mosaic_affine_pipeline,
68
+ dict(
69
+ type='YOLOv5MixUp',
70
+ prob=_base_.mixup_prob,
71
+ pre_transform=[*pre_transform, *mosaic_affine_pipeline]),
72
+ dict(
73
+ type='mmdet.Albu',
74
+ transforms=_base_.albu_train_transforms,
75
+ bbox_params=dict(
76
+ type='BboxParams',
77
+ format='pascal_voc',
78
+ label_fields=['gt_bboxes_labels', 'gt_ignore_flags']),
79
+ keymap={
80
+ 'img': 'image',
81
+ 'gt_bboxes': 'bboxes'
82
+ }),
83
+ dict(type='YOLOv5HSVRandomAug'),
84
+ dict(type='mmdet.RandomFlip', prob=0.5),
85
+ dict(
86
+ type='mmdet.PackDetInputs',
87
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip',
88
+ 'flip_direction'))
89
+ ]
90
+
91
+ _base_.test_pipeline[next(i for i, v in enumerate(_base_.test_pipeline) if v.type=='YOLOv5KeepRatioResize')].scale = img_scale
92
+ _base_.test_pipeline[next(i for i, v in enumerate(_base_.test_pipeline) if v.type=='LetterResize')].scale = img_scale
93
+
94
+
95
+ model = dict(
96
+ backbone=dict(
97
+ deepen_factor=deepen_factor,
98
+ widen_factor=widen_factor,
99
+ ),
100
+ neck=dict(
101
+ deepen_factor=deepen_factor,
102
+ widen_factor=widen_factor,
103
+ in_channels=[256, 512, 1024],
104
+ out_channels=[256, 512, 1024],
105
+ num_csp_blocks=3,
106
+ ),
107
+ bbox_head=dict(
108
+ head_module=dict(
109
+ widen_factor=widen_factor,
110
+ num_classes=num_classes,
111
+ featmap_strides=[8, 16, 32],
112
+ in_channels=[256, 512, 1024],
113
+ num_base_priors=3
114
+ ),
115
+ prior_generator=dict(
116
+ base_sizes=anchors,
117
+ strides=[
118
+ 8,
119
+ 16,
120
+ 32,
121
+ ],
122
+ ),
123
+ loss_obj=dict(
124
+ loss_weight=_base_.loss_obj_weight * ((img_scale[0] / 640)**2 * 3 / num_det_layers)
125
+ ),
126
+ loss_cls=dict(
127
+ loss_weight=_base_.loss_cls_weight * (num_classes / 80 * 3 / num_det_layers)
128
+ ),
129
+ loss_bbox=dict(
130
+ loss_weight=_base_.loss_bbox_weight * (3 / num_det_layers),
131
+ ),
132
+ obj_level_weights=[
133
+ 4.0,
134
+ 1.0,
135
+ 0.4,
136
+ ],
137
+ ),
138
+ test_cfg=dict(
139
+ nms=dict(type='nms', iou_threshold=0.65), # NMS type and threshold
140
+ multi_label=False,
141
+ ),
142
+
143
+ )
144
+
145
+
146
+ train_dataloader = dict(
147
+ batch_size=train_batch_size_per_gpu,
148
+ num_workers=train_num_workers,
149
+ dataset=dict(
150
+ _delete_=True,
151
+ type='RepeatDataset',
152
+ times=1,
153
+ dataset=dict(
154
+ type='YOLOv5CocoDataset',
155
+ data_root=data_root_train,
156
+ ann_file=data_root_train + 'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
157
+ data_prefix=dict(img='images/'),
158
+ metainfo=metainfo,
159
+ filter_cfg=dict(filter_empty_gt=False),
160
+ pipeline=train_pipeline
161
+ )
162
+ )
163
+ )
164
+
165
+ val_dataloader = dict(
166
+ batch_size=validation_batch_size_per_gpu,
167
+ num_workers=train_num_workers,
168
+ dataset=dict(
169
+ data_root=data_root_val,
170
+ metainfo=metainfo,
171
+ ann_file=data_root_val+'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
172
+ data_prefix=dict(img='images/'),
173
+ pipeline=_base_.test_pipeline
174
+ )
175
+ )
176
+
177
+ test_dataloader = dict(
178
+ batch_size=test_batch_size_per_gpu,
179
+ num_workers=train_num_workers,
180
+ dataset=dict(
181
+ data_root=data_root_test,
182
+ metainfo=metainfo,
183
+ ann_file=data_root_test+'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
184
+ data_prefix=dict(img='images/'),
185
+ batch_shapes_cfg=batch_shapes_cfg,
186
+ pipeline=_base_.test_pipeline
187
+ )
188
+ )
189
+
190
+ _base_.optim_wrapper.optimizer.batch_size_per_gpu = train_batch_size_per_gpu
191
+ _base_.optim_wrapper.optimizer.lr = base_lr
192
+
193
+ val_evaluator = dict(
194
+ ann_file=data_root_val+'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
195
+ )
196
+ test_evaluator = dict(
197
+ ann_file=data_root_test+'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json',
198
+ )
199
+
200
+
201
+ default_hooks = dict(
202
+ checkpoint=dict(
203
+ interval=1,
204
+ max_keep_ckpts=1,
205
+ save_best=['coco/bbox_mAP', 'coco/bbox_mAP_50']
206
+ ),
207
+ # The warmup_mim_iter parameter is critical.
208
+ # The default value is 1000 which is not suitable for cat datasets.
209
+ param_scheduler=dict(
210
+ max_epochs=max_epochs,
211
+ warmup_mim_iter=1000,
212
+ lr_factor=lr_factor
213
+ ),
214
+ logger=dict(type='LoggerHook', interval=5))
215
+
216
+ train_cfg = dict(max_epochs=max_epochs, val_interval=1)
217
+ visualizer = dict(vis_backends=[dict(type='LocalVisBackend'), dict(type='TensorboardVisBackend')])
218
+
YOLOv8/best_coco_bbox_mAP_50_epoch_32.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5f825c0b53ef12637277dc17b6b7ea9a3375bba2c1a4d7d700e83229edae25d
3
- size 209487703
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a20a476ff5a5b9ff6cfe690877a080b38981fe7b5d22132c261e293bc8c324d
3
+ size 134
YOLOv8/configs/yolov8/yolov8.py ADDED
@@ -0,0 +1,240 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = './yolov8_s_syncbn_fast_8xb16-500e_coco.py'
2
+
3
+ # ========================modified parameters======================
4
+ deepen_factor = 0.67
5
+ widen_factor = 0.75
6
+ last_stage_out_channels = 768
7
+
8
+ affine_scale = 0.9
9
+ mixup_prob = 0.1
10
+
11
+
12
+ img_scale = (128, 128) #_base_.img_scale
13
+ # img_scale = (640, 640) #_base_.img_scale
14
+ num_classes = 1
15
+ class_name = ('small',)
16
+ num_classes = len(class_name)
17
+ metainfo = dict(classes=class_name, palette=[(20, 220, 60)])
18
+
19
+ train_batch_size_per_gpu = 512
20
+ val_batch_size_per_gpu = 128
21
+ test_batch_size_per_gpu = 128
22
+
23
+ train_num_workers = 16
24
+ val_num_workers = 16
25
+ test_num_workers = 16
26
+
27
+ # -----train val related-----
28
+ # Base learning rate for optim_wrapper. Corresponding to 8xb16=64 bs
29
+ base_lr = 0.001
30
+ lr_factor = 0.01 # Learning rate scaling factor
31
+ max_epochs = 1000 # Maximum training epochs
32
+
33
+ # Disable mosaic augmentation for final 10 epochs (stage 2)
34
+ close_mosaic_epochs = 10
35
+
36
+ save_epoch_intervals = 1
37
+ max_keep_ckpts = 2
38
+
39
+ # validation intervals in stage 2
40
+ val_interval_stage2 = 1
41
+
42
+ # TRAIN DATASET
43
+ data_root_train = 'YOUR_PATH_TO_REAL_LINZ_TRAIN'
44
+ ann_file_train = 'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json'
45
+
46
+ # VAL DATASET
47
+ data_root_val = 'YOUR_PATH_TO_REAL_LINZ_VAL'
48
+ ann_file_val = 'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json'
49
+
50
+ # TEST DATASET
51
+ ## LINZ
52
+ data_root_test = 'YOUR_PATH_TO_REAL_LINZ_TEST'
53
+ ann_file_test = 'annotations_coco_FakeBBoxes:42.36px_ForIoU:0.500.json'
54
+
55
+
56
+ load_from = 'https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco/yolov8_m_syncbn_fast_8xb16-500e_coco_20230115_192200-c22e560a.pth'
57
+
58
+
59
+ # =======================Unmodified in most cases==================
60
+ pre_transform = _base_.pre_transform
61
+ last_transform = _base_.last_transform
62
+
63
+ model = dict(
64
+ backbone=dict(
65
+ last_stage_out_channels=last_stage_out_channels,
66
+ deepen_factor=deepen_factor,
67
+ widen_factor=widen_factor
68
+ ),
69
+ neck=dict(
70
+ deepen_factor=deepen_factor,
71
+ widen_factor=widen_factor,
72
+ in_channels=[256, 512, last_stage_out_channels],
73
+ out_channels=[256, 512, last_stage_out_channels]
74
+ ),
75
+ bbox_head=dict(
76
+ head_module=dict(
77
+ num_classes=num_classes,
78
+ widen_factor=widen_factor,
79
+ in_channels=[256, 512, last_stage_out_channels])
80
+ ),
81
+ train_cfg=dict(
82
+ assigner=dict(
83
+ num_classes=num_classes
84
+ )
85
+ )
86
+ )
87
+
88
+ mosaic_affine_transform = [
89
+ dict(
90
+ type='Mosaic',
91
+ img_scale=img_scale,
92
+ pad_val=114.0,
93
+ pre_transform=pre_transform),
94
+ dict(
95
+ type='YOLOv5RandomAffine',
96
+ max_rotate_degree=0.0,
97
+ max_shear_degree=0.0,
98
+ max_aspect_ratio=100,
99
+ scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
100
+ # img_scale is (width, height)
101
+ border=(-img_scale[0] // 2, -img_scale[1] // 2),
102
+ border_val=(114, 114, 114))
103
+ ]
104
+
105
+ # enable mixup
106
+ train_pipeline = [
107
+ *pre_transform, *mosaic_affine_transform,
108
+ dict(
109
+ type='YOLOv5MixUp',
110
+ prob=mixup_prob,
111
+ pre_transform=[*pre_transform, *mosaic_affine_transform]),
112
+ *last_transform
113
+ ]
114
+
115
+ train_pipeline_stage2 = [
116
+ *pre_transform,
117
+ dict(type='YOLOv5KeepRatioResize', scale=img_scale),
118
+ dict(
119
+ type='LetterResize',
120
+ scale=img_scale,
121
+ allow_scale_up=True,
122
+ pad_val=dict(img=114.0)
123
+ ),
124
+ dict(
125
+ type='YOLOv5RandomAffine',
126
+ max_rotate_degree=0.0,
127
+ max_shear_degree=0.0,
128
+ scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
129
+ max_aspect_ratio=100,
130
+ border_val=(114, 114, 114)
131
+ ),
132
+ *last_transform
133
+ ]
134
+
135
+ train_dataloader = dict(
136
+ batch_size=train_batch_size_per_gpu,
137
+ num_workers=train_num_workers,
138
+ dataset=dict(
139
+ data_root=data_root_train,
140
+ ann_file=data_root_train+ann_file_train,
141
+ data_prefix=dict(img='images/'),
142
+ filter_cfg=dict(filter_empty_gt=False),
143
+ metainfo=metainfo,
144
+ pipeline=train_pipeline
145
+ )
146
+ )
147
+
148
+ # _base_.test_pipeline[1].img_scale = img_scale
149
+ # _base_.test_pipeline[2].scale = img_scale
150
+
151
+ test_pipeline = [
152
+ dict(type='LoadImageFromFile', backend_args=_base_.backend_args),
153
+ dict(type='YOLOv5KeepRatioResize', scale=img_scale),
154
+ dict(
155
+ type='LetterResize',
156
+ scale=img_scale,
157
+ allow_scale_up=False,
158
+ pad_val=dict(img=114)),
159
+ dict(type='LoadAnnotations', with_bbox=True, _scope_='mmdet'),
160
+ dict(
161
+ type='mmdet.PackDetInputs',
162
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
163
+ 'scale_factor', 'pad_param'))
164
+ ]
165
+
166
+ val_dataloader = dict(
167
+ batch_size=val_batch_size_per_gpu,
168
+ num_workers=val_num_workers,
169
+ dataset=dict(
170
+ data_root=data_root_val,
171
+ ann_file=data_root_val+ann_file_val,
172
+ data_prefix=dict(img='images/'),
173
+ metainfo=metainfo,
174
+ # filter_cfg=dict(filter_empty_gt=False), # Does this make a change?
175
+ filter_cfg=dict(filter_empty_gt=True), # Does this make a change?
176
+ pipeline=test_pipeline,
177
+ )
178
+ )
179
+
180
+ test_dataloader = dict(
181
+ batch_size=test_batch_size_per_gpu,
182
+ num_workers=test_num_workers,
183
+ dataset=dict(
184
+ data_root=data_root_test,
185
+ ann_file=data_root_test+ann_file_test,
186
+ data_prefix=dict(img='images/'),
187
+ metainfo=metainfo,
188
+ filter_cfg=dict(filter_empty_gt=False), # Does this make a change?
189
+ pipeline=test_pipeline,
190
+ )
191
+ )
192
+
193
+
194
+ optim_wrapper = dict(
195
+ optimizer=dict(
196
+ lr=base_lr,
197
+ batch_size_per_gpu=train_batch_size_per_gpu
198
+ ),
199
+ )
200
+
201
+
202
+ default_hooks = dict(
203
+ param_scheduler=dict(
204
+ lr_factor=lr_factor,
205
+ max_epochs=max_epochs
206
+ ),
207
+ checkpoint=dict(
208
+ interval=save_epoch_intervals,
209
+ max_keep_ckpts=max_keep_ckpts,
210
+ save_best=['coco/bbox_mAP', 'coco/bbox_mAP_50']
211
+ )
212
+ )
213
+
214
+ _base_.custom_hooks[1].switch_epoch = max_epochs - close_mosaic_epochs
215
+ _base_.custom_hooks[1].switch_pipeline = train_pipeline_stage2
216
+
217
+ val_evaluator = dict(
218
+ ann_file=data_root_val + ann_file_val,
219
+ )
220
+
221
+ test_evaluator = dict(
222
+ ann_file= data_root_test + ann_file_test,
223
+ )
224
+
225
+ train_cfg = dict(
226
+ max_epochs=max_epochs,
227
+ val_interval=save_epoch_intervals,
228
+ dynamic_intervals=[
229
+ ((max_epochs - close_mosaic_epochs),
230
+ val_interval_stage2)
231
+ ]
232
+ )
233
+
234
+
235
+ visualizer = dict(
236
+ vis_backends=[
237
+ dict(type='LocalVisBackend'),
238
+ dict(type='TensorboardVisBackend')
239
+ ]
240
+ )