maelic commited on
Commit
fa89a5b
·
verified ·
1 Parent(s): 14b8c8e

Upload yolo12l/hydra_config.yaml with huggingface_hub

Browse files
Files changed (1) hide show
  1. yolo12l/hydra_config.yaml +348 -0
yolo12l/hydra_config.yaml ADDED
@@ -0,0 +1,348 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ seed: 42
2
+ metric_to_track: mR
3
+ dtype: float32
4
+ output_dir: ./checkpoints/PSG/react++_yolo12l
5
+ glove_dir: /mimer/NOBACKUP/groups/naiss2026-4-349/sgg_benchmark_develop/datasets/
6
+ verbose: INFO
7
+ paths_catalog: ''
8
+ paths_data: ''
9
+ input:
10
+ min_size_train: 640
11
+ max_size_train: 640
12
+ min_size_test: 640
13
+ max_size_test: 640
14
+ pixel_mean:
15
+ - 102.9801
16
+ - 115.9465
17
+ - 122.7717
18
+ pixel_std:
19
+ - 1.0
20
+ - 1.0
21
+ - 1.0
22
+ to_bgr255: true
23
+ flip_prob_train: 0.5
24
+ padding: true
25
+ brightness: 0.15
26
+ contrast: 0.15
27
+ saturation: 0.1
28
+ hue: 0.0
29
+ vertical_flip_prob_train: 0.0
30
+ datasets:
31
+ train:
32
+ - custom_dataset_train
33
+ val:
34
+ - custom_dataset_val
35
+ test:
36
+ - custom_dataset_test
37
+ name: custom_dataset
38
+ type: ''
39
+ path: ''
40
+ classes: []
41
+ catalog:
42
+ custom_dataset:
43
+ data_dir: /mimer/NOBACKUP/groups/naiss2026-4-349/DATASETS/PSG_coco_format/
44
+ dataloader:
45
+ num_workers: 8
46
+ size_divisibility: 32
47
+ aspect_ratio_grouping: true
48
+ model:
49
+ flip_aug: false
50
+ rpn_only: false
51
+ mask_on: false
52
+ attribute_on: false
53
+ relation_on: true
54
+ device: cuda
55
+ meta_architecture: GeneralizedYOLO
56
+ cls_agnostic_bbox_reg: false
57
+ weight: ''
58
+ pretrained_detector_ckpt: /mimer/NOBACKUP/groups/naiss2026-4-349/sgg_benchmark_develop/checkpoints/BACKBONES/last.pt
59
+ text_embedding: glove.6B
60
+ box_head: false
61
+ backbone:
62
+ type: yolo
63
+ extra_config: ''
64
+ freeze_conv_body_at: 2
65
+ nms_thresh: 0.001
66
+ freeze: true
67
+ freeze_at: 10
68
+ fpn:
69
+ use_gn: false
70
+ use_relu: false
71
+ group_norm:
72
+ dim_per_gp: -1
73
+ num_groups: 32
74
+ epsilon: 1.0e-05
75
+ yolo:
76
+ weights: ''
77
+ size: yolo12l
78
+ img_size: 640
79
+ out_channels:
80
+ - 256
81
+ - 512
82
+ - 512
83
+ rpn:
84
+ use_fpn: false
85
+ rpn_mid_channel: 512
86
+ anchor_sizes:
87
+ - 32
88
+ - 64
89
+ - 128
90
+ - 256
91
+ - 512
92
+ anchor_stride:
93
+ - 16
94
+ aspect_ratios:
95
+ - 0.5
96
+ - 1.0
97
+ - 2.0
98
+ straddle_thresh: 0
99
+ fg_iou_threshold: 0.7
100
+ bg_iou_threshold: 0.3
101
+ batch_size_per_image: 256
102
+ positive_fraction: 0.5
103
+ pre_nms_top_n_train: 12000
104
+ pre_nms_top_n_test: 6000
105
+ post_nms_top_n_train: 2000
106
+ post_nms_top_n_test: 1000
107
+ min_size: 0
108
+ fpn_post_nms_top_n_train: 2000
109
+ fpn_post_nms_top_n_test: 2000
110
+ fpn_post_nms_per_batch: true
111
+ rpn_head: SingleConvRPNHead
112
+ roi_heads:
113
+ fg_iou_threshold: 0.35
114
+ bg_iou_threshold: 0.3
115
+ bbox_reg_weights:
116
+ - 10.0
117
+ - 10.0
118
+ - 5.0
119
+ - 5.0
120
+ batch_size_per_image: 256
121
+ positive_fraction: 0.25
122
+ score_thresh: 0.01
123
+ nms: 0.5
124
+ post_nms_per_cls_topn: 300
125
+ nms_filter_duplicates: false
126
+ detections_per_img: 100
127
+ roi_box_head:
128
+ feature_extractor: FeatIdxBoxFeatureExtractor
129
+ predictor: FastRCNNPredictor
130
+ pooler_resolution: 14
131
+ pooler_sampling_ratio: 0
132
+ pooler_scales:
133
+ - 0.0625
134
+ mlp_head_dim: 256
135
+ use_gn: false
136
+ dilation: 1
137
+ conv_head_dim: 256
138
+ num_stacked_convs: 4
139
+ num_classes: 134
140
+ patch_size: 32
141
+ feat_idx_multiscale: true
142
+ feat_idx_neighbors: 1
143
+ roi_attribute_head:
144
+ feature_extractor: FPN2MLPFeatureExtractor
145
+ predictor: FPNPredictor
146
+ share_box_feature_extractor: true
147
+ use_binary_loss: true
148
+ attribute_loss_weight: 0.1
149
+ num_attributes: 201
150
+ max_attributes: 10
151
+ attribute_bgfg_sample: true
152
+ attribute_bgfg_ratio: 3
153
+ pos_weight: 5.0
154
+ roi_mask_head:
155
+ feature_extractor: ResNet50Conv5ROIFeatureExtractor
156
+ predictor: MaskRCNNC4Predictor
157
+ pooler_resolution: 14
158
+ pooler_sampling_ratio: 0
159
+ pooler_scales:
160
+ - 0.0625
161
+ mlp_head_dim: 1024
162
+ conv_layers:
163
+ - 256
164
+ - 256
165
+ - 256
166
+ - 256
167
+ resolution: 14
168
+ share_box_feature_extractor: true
169
+ postprocess_masks: false
170
+ postprocess_masks_threshold: 0.5
171
+ dilation: 1
172
+ use_gn: false
173
+ roi_relation_head:
174
+ predictor: REACTPlusPlusPredictor
175
+ feature_extractor: P5SceneContextExtractor
176
+ use_union_features: true
177
+ use_spatial_features: true
178
+ use_union_features_inference: true
179
+ union_dropout: 0.0
180
+ max_pairs_inference: 0
181
+ textual_features_only: false
182
+ visual_features_only: false
183
+ logit_adjustment: false
184
+ logit_adjustment_tau: 0.3
185
+ pooling_all_levels: true
186
+ batch_size_per_image: 512
187
+ positive_fraction: 0.35
188
+ use_gt_box: false
189
+ use_gt_object_label: false
190
+ embed_dim: 200
191
+ context_dropout_rate: 0.2
192
+ context_hidden_dim: 512
193
+ context_pooling_dim: 4096
194
+ context_obj_layer: 1
195
+ context_rel_layer: 1
196
+ mlp_head_dim: 512
197
+ loss:
198
+ loss_type: BalancedLogitAdjustedLoss
199
+ beta: 0.999
200
+ gamma: 0.0
201
+ alpha: 0.15
202
+ fg_boost: 2.0
203
+ fg_weight: 1.0
204
+ label_smoothing_epsilon: 0.01
205
+ logit_adjustment_tau: 0.5
206
+ bg_discount: 0.3
207
+ ccl_weight: 0.1
208
+ decisive_margin: 2.0
209
+ poly_epsilon: 0.0
210
+ label_smoothing: 0.1
211
+ sampler_aux_loss_weight: 0.1
212
+ attn_entropy_weight: 0.01
213
+ offset_reg_weight: 0.005
214
+ containment_loss_weight: 0.02
215
+ num_classes: 57
216
+ decoder_depth: 1
217
+ transformer_depth: 1
218
+ num_rel_layers: 2
219
+ use_scene_context: true
220
+ use_geo_bias: true
221
+ use_cls_emb: true
222
+ use_geo_enc: true
223
+ max_pairs_per_img: 512
224
+ num_queries: 64
225
+ use_cross_attention: true
226
+ attn_type: standard
227
+ geometric_loss_weight: 0.0
228
+ num_sample_points: 6
229
+ num_sample_heads: 6
230
+ feature_strategy: multi_scale
231
+ use_rmsnorm: true
232
+ use_swiglu: true
233
+ clip_rel_path: ''
234
+ react_loss_weights:
235
+ l21_loss: 1.0
236
+ dist_loss2: 0.1
237
+ loss_dis: 0.5
238
+ transformer:
239
+ dropout_rate: 0.1
240
+ obj_layer: 4
241
+ rel_layer: 2
242
+ num_head: 8
243
+ inner_dim: 2048
244
+ key_dim: 64
245
+ val_dim: 64
246
+ squat_module:
247
+ pre_norm: false
248
+ num_decoder: 3
249
+ rho: 0.35
250
+ beta: 0.7
251
+ pretrain_mask: false
252
+ pretrain_mask_epoch: 1
253
+ causal:
254
+ effect_analysis: false
255
+ fusion_type: sum
256
+ context_layer: motifs
257
+ separate_spatial: false
258
+ effect_type: none
259
+ spatial_for_vision: false
260
+ label_smoothing_loss: false
261
+ use_frequency_bias: false
262
+ require_box_overlap: false
263
+ num_sample_per_gt_rel: 8
264
+ add_gtbox_to_proposal_in_train: false
265
+ classifier: linear
266
+ predict_use_vision: false
267
+ use_bg_discounting: false
268
+ bg_discounting_threshold: 0.1
269
+ resnets:
270
+ num_groups: 1
271
+ width_per_group: 64
272
+ stride_in_1x1: true
273
+ trans_func: BottleneckWithFixedBatchNorm
274
+ stem_func: StemWithFixedBatchNorm
275
+ res5_dilation: 1
276
+ backbone_out_channels: 1024
277
+ res2_out_channels: 256
278
+ stem_out_channels: 64
279
+ solver:
280
+ max_iter: 0
281
+ max_epoch: 10
282
+ base_lr: 0.0001
283
+ bias_lr_factor: 1
284
+ momentum: 0.9
285
+ weight_decay: 0.05
286
+ weight_decay_bias: 0.0
287
+ clip_norm: 5.0
288
+ gamma: 0.5
289
+ steps:
290
+ - 41000
291
+ - 50000
292
+ warmup_factor: 0.1
293
+ warmup_epochs: 1
294
+ warmup_method: linear
295
+ checkpoint_period: 250
296
+ grad_norm_clip: 1.0
297
+ print_grad_freq: 250
298
+ to_val: true
299
+ pre_val: true
300
+ val_period: 250
301
+ update_schedule_during_load: false
302
+ ims_per_batch: 8
303
+ optimizer: ADAMW
304
+ slow_ratio: 10.0
305
+ deform_offset_slow_ratio: 1.0
306
+ muon_scaling: 0.2
307
+ adamw_scaling: 0.8
308
+ schedule:
309
+ type: WarmupCosineAnnealingIterLR
310
+ patience: 2
311
+ threshold: 0.0001
312
+ cooldown: 1
313
+ factor: 0.5
314
+ max_decay_step: 7
315
+ eta_min: 5.0e-07
316
+ plateau_epochs: 5
317
+ accum_steps: 4
318
+ test:
319
+ expected_results: []
320
+ expected_results_sigma_tol: 4
321
+ ims_per_batch: 1
322
+ detections_per_img: 100
323
+ informative: false
324
+ bbox_aug:
325
+ enabled: false
326
+ h_flip: false
327
+ scales: []
328
+ max_size: 4000
329
+ scale_h_flip: false
330
+ save_proposals: false
331
+ relation:
332
+ multiple_preds: false
333
+ iou_threshold: 0.5
334
+ require_overlap: false
335
+ later_nms_prediction_thres: 0.5
336
+ sync_gather: true
337
+ allow_load_from_cache: false
338
+ top_k: 100
339
+ custum_eval: false
340
+ custum_path: ''
341
+ global_setting:
342
+ basic_encoder: Cross-Attention
343
+ gcl_setting:
344
+ group_split_mode: divide4
345
+ knowledge_transfer_mode: KL_logit_TopDown
346
+ no_relation_restrain: false
347
+ zero_label_padding_mode: false
348
+ knowledge_loss_coefficient: 1.0