maelic commited on
Commit
9b5df6b
·
verified ·
1 Parent(s): 2f19ff3

Upload yolo12m/config.yml with huggingface_hub

Browse files
Files changed (1) hide show
  1. yolo12m/config.yml +337 -0
yolo12m/config.yml ADDED
@@ -0,0 +1,337 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ seed: 42
2
+ metric_to_track: mR
3
+ dtype: float16
4
+ output_dir: ./checkpoints/VG/react++_yolo12m
5
+ glove_dir: datasets
6
+ verbose: INFO
7
+ paths_catalog: ''
8
+ paths_data: ''
9
+ input:
10
+ img_size:
11
+ - 640
12
+ - 640
13
+ pixel_mean:
14
+ - 102.9801
15
+ - 115.9465
16
+ - 122.7717
17
+ pixel_std:
18
+ - 1.0
19
+ - 1.0
20
+ - 1.0
21
+ to_bgr255: true
22
+ flip_prob_train: 0.5
23
+ padding: true
24
+ brightness: 0.0
25
+ contrast: 0.0
26
+ saturation: 0.0
27
+ hue: 0.0
28
+ vertical_flip_prob_train: 0.0
29
+ datasets:
30
+ name: "VG150"
31
+ type: "coco"
32
+ data_dir: "datasets/VG150/VG150_coco_format/"
33
+ dataloader:
34
+ num_workers: 8
35
+ size_divisibility: 32
36
+ aspect_ratio_grouping: true
37
+ model:
38
+ flip_aug: false
39
+ rpn_only: false
40
+ mask_on: false
41
+ attribute_on: false
42
+ relation_on: true
43
+ device: cuda
44
+ meta_architecture: GeneralizedYOLO
45
+ cls_agnostic_bbox_reg: false
46
+ weight: ''
47
+ pretrained_detector_ckpt: /home/maelicneau/Documents/SGG-Benchmark/checkpoints/BACKBONES/VG150_yolo12m/weights/best.pt
48
+ text_embedding: glove.6B
49
+ box_head: false
50
+ backbone:
51
+ type: yolo
52
+ extra_config: ''
53
+ freeze_conv_body_at: 2
54
+ nms_thresh: 0.001
55
+ freeze: true
56
+ freeze_at: 10
57
+ fpn:
58
+ use_gn: false
59
+ use_relu: false
60
+ group_norm:
61
+ dim_per_gp: -1
62
+ num_groups: 32
63
+ epsilon: 1.0e-05
64
+ yolo:
65
+ weights: ''
66
+ size: yolo12m
67
+ img_size: 640
68
+ out_channels:
69
+ - 256
70
+ - 512
71
+ - 512
72
+ rpn:
73
+ use_fpn: false
74
+ rpn_mid_channel: 512
75
+ anchor_sizes:
76
+ - 32
77
+ - 64
78
+ - 128
79
+ - 256
80
+ - 512
81
+ anchor_stride:
82
+ - 16
83
+ aspect_ratios:
84
+ - 0.5
85
+ - 1.0
86
+ - 2.0
87
+ straddle_thresh: 0
88
+ fg_iou_threshold: 0.7
89
+ bg_iou_threshold: 0.3
90
+ batch_size_per_image: 256
91
+ positive_fraction: 0.5
92
+ pre_nms_top_n_train: 12000
93
+ pre_nms_top_n_test: 6000
94
+ post_nms_top_n_train: 2000
95
+ post_nms_top_n_test: 1000
96
+ min_size: 0
97
+ fpn_post_nms_top_n_train: 2000
98
+ fpn_post_nms_top_n_test: 2000
99
+ fpn_post_nms_per_batch: true
100
+ rpn_head: SingleConvRPNHead
101
+ roi_heads:
102
+ fg_iou_threshold: 0.5
103
+ bg_iou_threshold: 0.3
104
+ bbox_reg_weights:
105
+ - 10.0
106
+ - 10.0
107
+ - 5.0
108
+ - 5.0
109
+ batch_size_per_image: 256
110
+ positive_fraction: 0.25
111
+ score_thresh: 0.01
112
+ nms: 0.5
113
+ post_nms_per_cls_topn: 300
114
+ nms_filter_duplicates: false
115
+ detections_per_img: 100
116
+ roi_box_head:
117
+ feature_extractor: DAMPBoxFeatureExtractor
118
+ predictor: FastRCNNPredictor
119
+ pooler_resolution: 14
120
+ pooler_sampling_ratio: 0
121
+ pooler_scales:
122
+ - 0.0625
123
+ mlp_head_dim: 256
124
+ use_gn: false
125
+ dilation: 1
126
+ conv_head_dim: 256
127
+ num_stacked_convs: 4
128
+ num_classes: 151
129
+ patch_size: 32
130
+ feat_idx_multiscale: true
131
+ feat_idx_neighbors: 1
132
+ roi_attribute_head:
133
+ feature_extractor: FPN2MLPFeatureExtractor
134
+ predictor: FPNPredictor
135
+ share_box_feature_extractor: true
136
+ use_binary_loss: true
137
+ attribute_loss_weight: 0.1
138
+ num_attributes: 201
139
+ max_attributes: 10
140
+ attribute_bgfg_sample: true
141
+ attribute_bgfg_ratio: 3
142
+ pos_weight: 5.0
143
+ roi_mask_head:
144
+ feature_extractor: ResNet50Conv5ROIFeatureExtractor
145
+ predictor: MaskRCNNC4Predictor
146
+ pooler_resolution: 14
147
+ pooler_sampling_ratio: 0
148
+ pooler_scales:
149
+ - 0.0625
150
+ mlp_head_dim: 1024
151
+ conv_layers:
152
+ - 256
153
+ - 256
154
+ - 256
155
+ - 256
156
+ resolution: 14
157
+ share_box_feature_extractor: true
158
+ postprocess_masks: false
159
+ postprocess_masks_threshold: 0.5
160
+ dilation: 1
161
+ use_gn: false
162
+ roi_relation_head:
163
+ predictor: REACTPlusPlusPredictor
164
+ feature_extractor: P5SceneContextExtractor
165
+ use_union_features: true
166
+ use_spatial_features: true
167
+ use_union_features_inference: true
168
+ union_dropout: 0.0
169
+ max_pairs_inference: 0
170
+ textual_features_only: false
171
+ visual_features_only: false
172
+ logit_adjustment: false
173
+ logit_adjustment_tau: 0.3
174
+ pooling_all_levels: true
175
+ batch_size_per_image: 512
176
+ positive_fraction: 0.35
177
+ use_gt_box: false
178
+ use_gt_object_label: false
179
+ embed_dim: 200
180
+ context_dropout_rate: 0.2
181
+ context_hidden_dim: 512
182
+ context_pooling_dim: 4096
183
+ context_obj_layer: 1
184
+ context_rel_layer: 1
185
+ mlp_head_dim: 512
186
+ loss:
187
+ loss_type: BalancedLogitAdjustedLoss
188
+ beta: 0.999
189
+ gamma: 0.0
190
+ alpha: 0.25
191
+ fg_boost: 2.0
192
+ fg_weight: 1.0
193
+ label_smoothing_epsilon: 0.01
194
+ logit_adjustment_tau: 0.5
195
+ bg_discount: 0.4
196
+ ccl_weight: 0.1
197
+ decisive_margin: 2.0
198
+ poly_epsilon: 0.0
199
+ label_smoothing: 0.1
200
+ sampler_aux_loss_weight: 0.1
201
+ attn_entropy_weight: 0.01
202
+ offset_reg_weight: 0.005
203
+ containment_loss_weight: 0.02
204
+ num_classes: 51
205
+ decoder_depth: 1
206
+ transformer_depth: 1
207
+ num_rel_layers: 2
208
+ use_scene_context: true
209
+ use_geo_bias: true
210
+ use_cls_emb: true
211
+ use_geo_enc: true
212
+ max_pairs_per_img: 512
213
+ num_queries: 64
214
+ use_cross_attention: true
215
+ attn_type: standard
216
+ geometric_loss_weight: 0.0
217
+ num_sample_points: 6
218
+ num_sample_heads: 6
219
+ feature_strategy: multi_scale
220
+ use_rmsnorm: true
221
+ use_swiglu: true
222
+ clip_rel_path: ''
223
+ react_loss_weights:
224
+ l21_loss: 1.0
225
+ dist_loss2: 0.1
226
+ loss_dis: 0.5
227
+ transformer:
228
+ dropout_rate: 0.1
229
+ obj_layer: 4
230
+ rel_layer: 2
231
+ num_head: 8
232
+ inner_dim: 2048
233
+ key_dim: 64
234
+ val_dim: 64
235
+ squat_module:
236
+ pre_norm: false
237
+ num_decoder: 3
238
+ rho: 0.35
239
+ beta: 0.7
240
+ pretrain_mask: false
241
+ pretrain_mask_epoch: 1
242
+ causal:
243
+ effect_analysis: false
244
+ fusion_type: sum
245
+ context_layer: motifs
246
+ separate_spatial: false
247
+ effect_type: none
248
+ spatial_for_vision: false
249
+ label_smoothing_loss: false
250
+ use_frequency_bias: false
251
+ require_box_overlap: false
252
+ num_sample_per_gt_rel: 8
253
+ add_gtbox_to_proposal_in_train: true
254
+ classifier: linear
255
+ predict_use_vision: false
256
+ use_bg_discounting: false
257
+ bg_discounting_threshold: 0.1
258
+ resnets:
259
+ num_groups: 1
260
+ width_per_group: 64
261
+ stride_in_1x1: true
262
+ trans_func: BottleneckWithFixedBatchNorm
263
+ stem_func: StemWithFixedBatchNorm
264
+ res5_dilation: 1
265
+ backbone_out_channels: 1024
266
+ res2_out_channels: 256
267
+ stem_out_channels: 64
268
+ solver:
269
+ max_iter: 0
270
+ max_epoch: 20
271
+ base_lr: 0.0002
272
+ bias_lr_factor: 1
273
+ momentum: 0.9
274
+ weight_decay: 0.05
275
+ weight_decay_bias: 0.0
276
+ clip_norm: 5.0
277
+ gamma: 0.5
278
+ steps:
279
+ - 41000
280
+ - 50000
281
+ warmup_factor: 0.1
282
+ warmup_epochs: 2
283
+ warmup_method: linear
284
+ checkpoint_period: 500
285
+ grad_norm_clip: 1.0
286
+ print_grad_freq: 500
287
+ to_val: true
288
+ pre_val: true
289
+ val_period: 500
290
+ update_schedule_during_load: false
291
+ ims_per_batch: 16
292
+ optimizer: ADAMW
293
+ slow_ratio: 10.0
294
+ deform_offset_slow_ratio: 1.0
295
+ muon_scaling: 0.2
296
+ adamw_scaling: 0.8
297
+ schedule:
298
+ type: WarmupCosineAnnealingIterLR
299
+ patience: 2
300
+ threshold: 0.0001
301
+ cooldown: 1
302
+ factor: 0.5
303
+ max_decay_step: 7
304
+ eta_min: 5.0e-07
305
+ plateau_epochs: 5
306
+ accum_steps: 2
307
+ test:
308
+ expected_results: []
309
+ expected_results_sigma_tol: 4
310
+ ims_per_batch: 1
311
+ detections_per_img: 100
312
+ informative: false
313
+ bbox_aug:
314
+ enabled: false
315
+ h_flip: false
316
+ scales: []
317
+ max_size: 4000
318
+ scale_h_flip: false
319
+ save_proposals: false
320
+ relation:
321
+ multiple_preds: false
322
+ iou_threshold: 0.5
323
+ require_overlap: false
324
+ later_nms_prediction_thres: 0.5
325
+ sync_gather: true
326
+ allow_load_from_cache: false
327
+ top_k: 100
328
+ custum_eval: false
329
+ custum_path: ''
330
+ global_setting:
331
+ basic_encoder: Cross-Attention
332
+ gcl_setting:
333
+ group_split_mode: divide4
334
+ knowledge_transfer_mode: KL_logit_TopDown
335
+ no_relation_restrain: false
336
+ zero_label_padding_mode: false
337
+ knowledge_loss_coefficient: 1.0