maelic
/

REACTPlusPlus_PSG

@@ -1,348 +0,0 @@
-seed: 42
-metric_to_track: mR
-dtype: float32
-output_dir: ./checkpoints/PSG/react++_yolov8m
-glove_dir: /mimer/NOBACKUP/groups/naiss2026-4-349/sgg_benchmark_develop/datasets/
-verbose: INFO
-paths_catalog: ''
-paths_data: ''
-input:
-  min_size_train: 640
-  max_size_train: 640
-  min_size_test: 640
-  max_size_test: 640
-  pixel_mean:
-  - 102.9801
-  - 115.9465
-  - 122.7717
-  pixel_std:
-  - 1.0
-  - 1.0
-  - 1.0
-  to_bgr255: true
-  flip_prob_train: 0.5
-  padding: true
-  brightness: 0.15
-  contrast: 0.15
-  saturation: 0.1
-  hue: 0.0
-  vertical_flip_prob_train: 0.0
-datasets:
-  train:
-  - custom_dataset_train
-  val:
-  - custom_dataset_val
-  test:
-  - custom_dataset_test
-  name: custom_dataset
-  type: ''
-  path: ''
-  classes: []
-  catalog:
-    custom_dataset:
-      data_dir: /mimer/NOBACKUP/groups/naiss2026-4-349/DATASETS/PSG_coco_format/
-dataloader:
-  num_workers: 8
-  size_divisibility: 32
-  aspect_ratio_grouping: true
-model:
-  flip_aug: false
-  rpn_only: false
-  mask_on: false
-  attribute_on: false
-  relation_on: true
-  device: cuda
-  meta_architecture: GeneralizedYOLO
-  cls_agnostic_bbox_reg: false
-  weight: ''
-  pretrained_detector_ckpt: /mimer/NOBACKUP/groups/naiss2026-4-349/sgg_benchmark_develop/checkpoints/BACKBONES/yolov8m_psg.pt
-  text_embedding: glove.6B
-  box_head: false
-  backbone:
-    type: yolo
-    extra_config: ''
-    freeze_conv_body_at: 2
-    nms_thresh: 0.001
-    freeze: true
-    freeze_at: 10
-  fpn:
-    use_gn: false
-    use_relu: false
-  group_norm:
-    dim_per_gp: -1
-    num_groups: 32
-    epsilon: 1.0e-05
-  yolo:
-    weights: ''
-    size: yolov8m
-    img_size: 640
-    out_channels:
-    - 192
-    - 384
-    - 576
-  rpn:
-    use_fpn: false
-    rpn_mid_channel: 512
-    anchor_sizes:
-    - 32
-    - 64
-    - 128
-    - 256
-    - 512
-    anchor_stride:
-    - 16
-    aspect_ratios:
-    - 0.5
-    - 1.0
-    - 2.0
-    straddle_thresh: 0
-    fg_iou_threshold: 0.7
-    bg_iou_threshold: 0.3
-    batch_size_per_image: 256
-    positive_fraction: 0.5
-    pre_nms_top_n_train: 12000
-    pre_nms_top_n_test: 6000
-    post_nms_top_n_train: 2000
-    post_nms_top_n_test: 1000
-    min_size: 0
-    fpn_post_nms_top_n_train: 2000
-    fpn_post_nms_top_n_test: 2000
-    fpn_post_nms_per_batch: true
-    rpn_head: SingleConvRPNHead
-  roi_heads:
-    fg_iou_threshold: 0.35
-    bg_iou_threshold: 0.3
-    bbox_reg_weights:
-    - 10.0
-    - 10.0
-    - 5.0
-    - 5.0
-    batch_size_per_image: 256
-    positive_fraction: 0.25
-    score_thresh: 0.01
-    nms: 0.5
-    post_nms_per_cls_topn: 300
-    nms_filter_duplicates: false
-    detections_per_img: 100
-  roi_box_head:
-    feature_extractor: FeatIdxBoxFeatureExtractor
-    predictor: FastRCNNPredictor
-    pooler_resolution: 14
-    pooler_sampling_ratio: 0
-    pooler_scales:
-    - 0.0625
-    mlp_head_dim: 256
-    use_gn: false
-    dilation: 1
-    conv_head_dim: 256
-    num_stacked_convs: 4
-    num_classes: 134
-    patch_size: 32
-    feat_idx_multiscale: true
-    feat_idx_neighbors: 1
-  roi_attribute_head:
-    feature_extractor: FPN2MLPFeatureExtractor
-    predictor: FPNPredictor
-    share_box_feature_extractor: true
-    use_binary_loss: true
-    attribute_loss_weight: 0.1
-    num_attributes: 201
-    max_attributes: 10
-    attribute_bgfg_sample: true
-    attribute_bgfg_ratio: 3
-    pos_weight: 5.0
-  roi_mask_head:
-    feature_extractor: ResNet50Conv5ROIFeatureExtractor
-    predictor: MaskRCNNC4Predictor
-    pooler_resolution: 14
-    pooler_sampling_ratio: 0
-    pooler_scales:
-    - 0.0625
-    mlp_head_dim: 1024
-    conv_layers:
-    - 256
-    - 256
-    - 256
-    - 256
-    resolution: 14
-    share_box_feature_extractor: true
-    postprocess_masks: false
-    postprocess_masks_threshold: 0.5
-    dilation: 1
-    use_gn: false
-  roi_relation_head:
-    predictor: REACTPlusPlusPredictor
-    feature_extractor: P5SceneContextExtractor
-    use_union_features: true
-    use_spatial_features: true
-    use_union_features_inference: true
-    union_dropout: 0.0
-    max_pairs_inference: 0
-    textual_features_only: false
-    visual_features_only: false
-    logit_adjustment: false
-    logit_adjustment_tau: 0.3
-    pooling_all_levels: true
-    batch_size_per_image: 512
-    positive_fraction: 0.35
-    use_gt_box: false
-    use_gt_object_label: false
-    embed_dim: 200
-    context_dropout_rate: 0.2
-    context_hidden_dim: 512
-    context_pooling_dim: 4096
-    context_obj_layer: 1
-    context_rel_layer: 1
-    mlp_head_dim: 512
-    loss:
-      loss_type: BalancedLogitAdjustedLoss
-      beta: 0.999
-      gamma: 0.0
-      alpha: 0.15
-      fg_boost: 2.0
-      fg_weight: 1.0
-      label_smoothing_epsilon: 0.01
-      logit_adjustment_tau: 0.5
-      bg_discount: 0.3
-      ccl_weight: 0.1
-      decisive_margin: 2.0
-      poly_epsilon: 0.0
-      label_smoothing: 0.1
-      sampler_aux_loss_weight: 0.1
-      attn_entropy_weight: 0.01
-      offset_reg_weight: 0.005
-      containment_loss_weight: 0.02
-    num_classes: 57
-    decoder_depth: 1
-    transformer_depth: 1
-    num_rel_layers: 2
-    use_scene_context: true
-    use_geo_bias: true
-    use_cls_emb: true
-    use_geo_enc: true
-    max_pairs_per_img: 512
-    num_queries: 64
-    use_cross_attention: true
-    attn_type: standard
-    geometric_loss_weight: 0.0
-    num_sample_points: 6
-    num_sample_heads: 6
-    feature_strategy: multi_scale
-    use_rmsnorm: true
-    use_swiglu: true
-    clip_rel_path: ''
-    react_loss_weights:
-      l21_loss: 1.0
-      dist_loss2: 0.1
-      loss_dis: 0.5
-    transformer:
-      dropout_rate: 0.1
-      obj_layer: 4
-      rel_layer: 2
-      num_head: 8
-      inner_dim: 2048
-      key_dim: 64
-      val_dim: 64
-    squat_module:
-      pre_norm: false
-      num_decoder: 3
-      rho: 0.35
-      beta: 0.7
-      pretrain_mask: false
-      pretrain_mask_epoch: 1
-    causal:
-      effect_analysis: false
-      fusion_type: sum
-      context_layer: motifs
-      separate_spatial: false
-      effect_type: none
-      spatial_for_vision: false
-    label_smoothing_loss: false
-    use_frequency_bias: false
-    require_box_overlap: false
-    num_sample_per_gt_rel: 8
-    add_gtbox_to_proposal_in_train: false
-    classifier: linear
-    predict_use_vision: false
-    use_bg_discounting: false
-    bg_discounting_threshold: 0.1
-  resnets:
-    num_groups: 1
-    width_per_group: 64
-    stride_in_1x1: true
-    trans_func: BottleneckWithFixedBatchNorm
-    stem_func: StemWithFixedBatchNorm
-    res5_dilation: 1
-    backbone_out_channels: 1024
-    res2_out_channels: 256
-    stem_out_channels: 64
-solver:
-  max_iter: 0
-  max_epoch: 10
-  base_lr: 0.0001
-  bias_lr_factor: 1
-  momentum: 0.9
-  weight_decay: 0.05
-  weight_decay_bias: 0.0
-  clip_norm: 5.0
-  gamma: 0.5
-  steps:
-  - 41000
-  - 50000
-  warmup_factor: 0.1
-  warmup_epochs: 1
-  warmup_method: linear
-  checkpoint_period: 250
-  grad_norm_clip: 1.0
-  print_grad_freq: 250
-  to_val: true
-  pre_val: true
-  val_period: 250
-  update_schedule_during_load: false
-  ims_per_batch: 8
-  optimizer: ADAMW
-  slow_ratio: 10.0
-  deform_offset_slow_ratio: 1.0
-  muon_scaling: 0.2
-  adamw_scaling: 0.8
-  schedule:
-    type: WarmupCosineAnnealingIterLR
-    patience: 2
-    threshold: 0.0001
-    cooldown: 1
-    factor: 0.5
-    max_decay_step: 7
-    eta_min: 5.0e-07
-    plateau_epochs: 5
-  accum_steps: 4
-test:
-  expected_results: []
-  expected_results_sigma_tol: 4
-  ims_per_batch: 1
-  detections_per_img: 100
-  informative: false
-  bbox_aug:
-    enabled: false
-    h_flip: false
-    scales: []
-    max_size: 4000
-    scale_h_flip: false
-  save_proposals: false
-  relation:
-    multiple_preds: false
-    iou_threshold: 0.5
-    require_overlap: false
-    later_nms_prediction_thres: 0.5
-    sync_gather: true
-  allow_load_from_cache: false
-  top_k: 100
-  custum_eval: false
-  custum_path: ''
-global_setting:
-  basic_encoder: Cross-Attention
-  gcl_setting:
-    group_split_mode: divide4
-    knowledge_transfer_mode: KL_logit_TopDown
-    no_relation_restrain: false
-    zero_label_padding_mode: false
-    knowledge_loss_coefficient: 1.0