maelic commited on
Commit
2c90c3b
·
verified ·
1 Parent(s): 1ec45b3

Upload yolov8m/config.yml with huggingface_hub

Browse files
Files changed (1) hide show
  1. yolov8m/config.yml +342 -0
yolov8m/config.yml ADDED
@@ -0,0 +1,342 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ seed: 42
2
+ metric_to_track: mR
3
+ dtype: float32
4
+ output_dir: ./checkpoints/IndoorVG/react++_yolov8m
5
+ glove_dir: ./datasets/
6
+ verbose: INFO
7
+ paths_catalog: ''
8
+ paths_data: ''
9
+ input:
10
+ img_size:
11
+ - 640
12
+ - 640
13
+ pixel_mean:
14
+ - 102.9801
15
+ - 115.9465
16
+ - 122.7717
17
+ pixel_std:
18
+ - 1.0
19
+ - 1.0
20
+ - 1.0
21
+ to_bgr255: true
22
+ flip_prob_train: 0.5
23
+ padding: true
24
+ brightness: 0.15
25
+ contrast: 0.15
26
+ saturation: 0.1
27
+ hue: 0.0
28
+ vertical_flip_prob_train: 0.0
29
+ datasets:
30
+ train: []
31
+ val: []
32
+ test: []
33
+ name: IndoorVG
34
+ type: coco
35
+ data_dir: datasets/IndoorVG/IndoorVG_coco_format
36
+ classes: []
37
+ catalog: {}
38
+ dataloader:
39
+ num_workers: 8
40
+ size_divisibility: 32
41
+ aspect_ratio_grouping: true
42
+ model:
43
+ flip_aug: false
44
+ rpn_only: false
45
+ mask_on: false
46
+ attribute_on: false
47
+ relation_on: true
48
+ device: cuda
49
+ meta_architecture: GeneralizedYOLO
50
+ cls_agnostic_bbox_reg: false
51
+ weight: ''
52
+ pretrained_detector_ckpt: ./checkpoints/BACKBONES/yolov8m_indoorvg.pt
53
+ text_embedding: glove.6B
54
+ box_head: false
55
+ backbone:
56
+ type: yolo
57
+ extra_config: ''
58
+ freeze_conv_body_at: 2
59
+ nms_thresh: 0.001
60
+ freeze: true
61
+ freeze_at: 10
62
+ fpn:
63
+ use_gn: false
64
+ use_relu: false
65
+ group_norm:
66
+ dim_per_gp: -1
67
+ num_groups: 32
68
+ epsilon: 1.0e-05
69
+ yolo:
70
+ weights: ''
71
+ size: yolov8m
72
+ img_size: 640
73
+ out_channels:
74
+ - 192
75
+ - 384
76
+ - 576
77
+ rpn:
78
+ use_fpn: false
79
+ rpn_mid_channel: 512
80
+ anchor_sizes:
81
+ - 32
82
+ - 64
83
+ - 128
84
+ - 256
85
+ - 512
86
+ anchor_stride:
87
+ - 16
88
+ aspect_ratios:
89
+ - 0.5
90
+ - 1.0
91
+ - 2.0
92
+ straddle_thresh: 0
93
+ fg_iou_threshold: 0.7
94
+ bg_iou_threshold: 0.3
95
+ batch_size_per_image: 256
96
+ positive_fraction: 0.5
97
+ pre_nms_top_n_train: 12000
98
+ pre_nms_top_n_test: 6000
99
+ post_nms_top_n_train: 2000
100
+ post_nms_top_n_test: 1000
101
+ min_size: 0
102
+ fpn_post_nms_top_n_train: 2000
103
+ fpn_post_nms_top_n_test: 2000
104
+ fpn_post_nms_per_batch: true
105
+ rpn_head: SingleConvRPNHead
106
+ roi_heads:
107
+ fg_iou_threshold: 0.5
108
+ bg_iou_threshold: 0.3
109
+ bbox_reg_weights:
110
+ - 10.0
111
+ - 10.0
112
+ - 5.0
113
+ - 5.0
114
+ batch_size_per_image: 256
115
+ positive_fraction: 0.25
116
+ score_thresh: 0.01
117
+ nms: 0.5
118
+ post_nms_per_cls_topn: 300
119
+ nms_filter_duplicates: false
120
+ detections_per_img: 100
121
+ roi_box_head:
122
+ feature_extractor: DAMPBoxFeatureExtractor
123
+ predictor: FastRCNNPredictor
124
+ pooler_resolution: 14
125
+ pooler_sampling_ratio: 0
126
+ pooler_scales:
127
+ - 0.0625
128
+ mlp_head_dim: 256
129
+ use_gn: false
130
+ dilation: 1
131
+ conv_head_dim: 256
132
+ num_stacked_convs: 4
133
+ num_classes: 85
134
+ patch_size: 32
135
+ feat_idx_multiscale: true
136
+ feat_idx_neighbors: 1
137
+ roi_attribute_head:
138
+ feature_extractor: FPN2MLPFeatureExtractor
139
+ predictor: FPNPredictor
140
+ share_box_feature_extractor: true
141
+ use_binary_loss: true
142
+ attribute_loss_weight: 0.1
143
+ num_attributes: 201
144
+ max_attributes: 10
145
+ attribute_bgfg_sample: true
146
+ attribute_bgfg_ratio: 3
147
+ pos_weight: 5.0
148
+ roi_mask_head:
149
+ feature_extractor: ResNet50Conv5ROIFeatureExtractor
150
+ predictor: MaskRCNNC4Predictor
151
+ pooler_resolution: 14
152
+ pooler_sampling_ratio: 0
153
+ pooler_scales:
154
+ - 0.0625
155
+ mlp_head_dim: 1024
156
+ conv_layers:
157
+ - 256
158
+ - 256
159
+ - 256
160
+ - 256
161
+ resolution: 14
162
+ share_box_feature_extractor: true
163
+ postprocess_masks: false
164
+ postprocess_masks_threshold: 0.5
165
+ dilation: 1
166
+ use_gn: false
167
+ roi_relation_head:
168
+ predictor: REACTPlusPlusPredictor
169
+ feature_extractor: P5SceneContextExtractor
170
+ use_union_features: true
171
+ use_spatial_features: true
172
+ use_union_features_inference: true
173
+ union_dropout: 0.0
174
+ max_pairs_inference: 0
175
+ textual_features_only: false
176
+ visual_features_only: false
177
+ logit_adjustment: false
178
+ logit_adjustment_tau: 0.3
179
+ pooling_all_levels: true
180
+ batch_size_per_image: 512
181
+ positive_fraction: 0.35
182
+ use_gt_box: false
183
+ use_gt_object_label: false
184
+ embed_dim: 200
185
+ context_dropout_rate: 0.2
186
+ context_hidden_dim: 512
187
+ context_pooling_dim: 4096
188
+ context_obj_layer: 1
189
+ context_rel_layer: 1
190
+ mlp_head_dim: 512
191
+ loss:
192
+ loss_type: BalancedLogitAdjustedLoss
193
+ beta: 0.999
194
+ gamma: 0.0
195
+ alpha: 0.15
196
+ fg_boost: 2.0
197
+ fg_weight: 1.0
198
+ label_smoothing_epsilon: 0.01
199
+ logit_adjustment_tau: 0.5
200
+ bg_discount: 0.3
201
+ ccl_weight: 0.1
202
+ decisive_margin: 2.0
203
+ poly_epsilon: 0.0
204
+ label_smoothing: 0.1
205
+ sampler_aux_loss_weight: 0.1
206
+ attn_entropy_weight: 0.01
207
+ offset_reg_weight: 0.005
208
+ containment_loss_weight: 0.02
209
+ num_classes: 38
210
+ decoder_depth: 1
211
+ transformer_depth: 1
212
+ num_rel_layers: 2
213
+ use_scene_context: true
214
+ use_geo_bias: true
215
+ use_cls_emb: true
216
+ use_geo_enc: true
217
+ max_pairs_per_img: 512
218
+ num_queries: 64
219
+ use_cross_attention: true
220
+ attn_type: standard
221
+ geometric_loss_weight: 0.0
222
+ num_sample_points: 6
223
+ num_sample_heads: 6
224
+ feature_strategy: multi_scale
225
+ use_rmsnorm: true
226
+ use_swiglu: true
227
+ clip_rel_path: ''
228
+ react_loss_weights:
229
+ l21_loss: 1.0
230
+ dist_loss2: 0.1
231
+ loss_dis: 0.5
232
+ transformer:
233
+ dropout_rate: 0.1
234
+ obj_layer: 4
235
+ rel_layer: 2
236
+ num_head: 8
237
+ inner_dim: 2048
238
+ key_dim: 64
239
+ val_dim: 64
240
+ squat_module:
241
+ pre_norm: false
242
+ num_decoder: 3
243
+ rho: 0.35
244
+ beta: 0.7
245
+ pretrain_mask: false
246
+ pretrain_mask_epoch: 1
247
+ causal:
248
+ effect_analysis: false
249
+ fusion_type: sum
250
+ context_layer: motifs
251
+ separate_spatial: false
252
+ effect_type: none
253
+ spatial_for_vision: false
254
+ label_smoothing_loss: false
255
+ use_frequency_bias: false
256
+ require_box_overlap: false
257
+ num_sample_per_gt_rel: 8
258
+ add_gtbox_to_proposal_in_train: false
259
+ classifier: linear
260
+ predict_use_vision: false
261
+ use_bg_discounting: false
262
+ bg_discounting_threshold: 0.1
263
+ resnets:
264
+ num_groups: 1
265
+ width_per_group: 64
266
+ stride_in_1x1: true
267
+ trans_func: BottleneckWithFixedBatchNorm
268
+ stem_func: StemWithFixedBatchNorm
269
+ res5_dilation: 1
270
+ backbone_out_channels: 1024
271
+ res2_out_channels: 256
272
+ stem_out_channels: 64
273
+ solver:
274
+ max_iter: 0
275
+ max_epoch: 20
276
+ base_lr: 0.0001
277
+ bias_lr_factor: 1
278
+ momentum: 0.9
279
+ weight_decay: 0.05
280
+ weight_decay_bias: 0.0
281
+ clip_norm: 5.0
282
+ gamma: 0.5
283
+ steps:
284
+ - 41000
285
+ - 50000
286
+ warmup_factor: 0.1
287
+ warmup_epochs: 2
288
+ warmup_method: linear
289
+ checkpoint_period: 250
290
+ grad_norm_clip: 1.0
291
+ print_grad_freq: 250
292
+ to_val: true
293
+ pre_val: true
294
+ val_period: 250
295
+ update_schedule_during_load: false
296
+ ims_per_batch: 8
297
+ optimizer: ADAMW
298
+ slow_ratio: 10.0
299
+ deform_offset_slow_ratio: 1.0
300
+ muon_scaling: 0.2
301
+ adamw_scaling: 0.8
302
+ schedule:
303
+ type: WarmupCosineAnnealingIterLR
304
+ patience: 2
305
+ threshold: 0.0001
306
+ cooldown: 1
307
+ factor: 0.5
308
+ max_decay_step: 7
309
+ eta_min: 5.0e-07
310
+ plateau_epochs: 5
311
+ accum_steps: 4
312
+ test:
313
+ expected_results: []
314
+ expected_results_sigma_tol: 4
315
+ ims_per_batch: 1
316
+ detections_per_img: 100
317
+ informative: false
318
+ bbox_aug:
319
+ enabled: false
320
+ h_flip: false
321
+ scales: []
322
+ max_size: 4000
323
+ scale_h_flip: false
324
+ save_proposals: false
325
+ relation:
326
+ multiple_preds: false
327
+ iou_threshold: 0.5
328
+ require_overlap: false
329
+ later_nms_prediction_thres: 0.5
330
+ sync_gather: true
331
+ allow_load_from_cache: false
332
+ top_k: 100
333
+ custum_eval: false
334
+ custum_path: ''
335
+ global_setting:
336
+ basic_encoder: Cross-Attention
337
+ gcl_setting:
338
+ group_split_mode: divide4
339
+ knowledge_transfer_mode: KL_logit_TopDown
340
+ no_relation_restrain: false
341
+ zero_label_padding_mode: false
342
+ knowledge_loss_coefficient: 1.0