name: fax # only used for demonstration data api root_dir: '/data/s2/semantic-opv2v/train' validate_dir: '/data/s2/semantic-opv2v/test' train_params: batch_size: &batch_size 1 epoches: &epoches 71 eval_freq: 5 save_freq: 5 max_cav: &max_cav 5 visible: true fusion: core_method: 'CamIntermediateFusionDataset' # LateFusionDataset, EarlyFusionDataset, IntermediateFusionDataset supported args: [] data_augment: [] add_data_extension: ['bev_dynamic.png', 'bev_static.png', 'bev_lane.png', 'bev_visibility.png', 'bev_visibility_corp.png'] # preprocess-related preprocess: # options: BasePreprocessor, VoxelPreprocessor, BevPreprocessor core_method: 'RgbPreprocessor' args: bgr2rgb: true resize_x: &image_width 512 resize_y: &image_height 512 mean: [0.485, 0.456, 0.406] std: [0.229, 0.224, 0.225] # object evaluation range cav_lidar_range: &cav_lidar [-50, -50, -3, 50, 50, 1] # anchor box related postprocess: core_method: 'CameraBevPostprocessor' # VoxelPostprocessor, BevPostprocessor supported anchor_args: cav_lidar_range: *cav_lidar order: 'hwl' # hwl or lwh max_num: 100 # maximum number of objects in a single frame. use this number to make sure different frames has the same dimension in the same batch nms_thresh: 0.15 model: core_method: corpbevt args: target: &target 'dynamic' #'dynamic' dynamic, static or both max_cav: *max_cav encoder: num_layers: 34 pretrained: true image_width: *image_width image_height: *image_height id_pick: [1, 2, 3] compression: 8 #0.2 #2 #0 #64 #0 #8 #64 #0 # compression rate decoder: input_dim: 128 num_layer: 3 num_ch_dec: &decoder_block [32, 64, 128] fax: dim: [128, 128, 128] # b, d, h w from resenet -> b 256 h w middle: [2, 2, 2] # middle conv bev_embedding: sigma: 1.0 bev_height: 256 bev_width: 256 h_meters: 100 w_meters: 100 offset: 0.0 upsample_scales: [2, 4, 8] cross_view: #cross_view attention image_height: *image_height image_width: *image_width no_image_features: False skip: True heads: [4, 4, 4] dim_head: [32, 32, 32] qkv_bias: True cross_view_swap: rel_pos_emb: False q_win_size: [ [ 16, 16 ], [ 16, 16 ], [ 32, 32 ] ] feat_win_size: [ [ 8, 8 ], [ 8, 8 ], [ 16, 16 ] ] bev_embedding_flag: [ true, false, false ] self_attn: dim_head: 32 dropout: 0.1 window_size: 32 sttf: &sttf resolution: 0.390625 # m/pixel downsample_rate: 8 use_roi_mask: true fax_fusion: input_dim: 128 mlp_dim: 256 agent_size: *max_cav window_size: 8 dim_head: 32 drop_out: 0.1 depth: 3 mask: true seg_head_dim: 32 output_class: 2 loss: core_method: vanilla_seg_loss args: target: *target d_weights: 75.0 s_weights: 15.0 d_coe: 2.0 s_coe: 0.0 optimizer: core_method: AdamW lr: 2e-4 args: eps: 1e-10 weight_decay: 1e-2 lr_scheduler: core_method: cosineannealwarm #step, multistep, Exponential and cosineannealwarm support epoches: *epoches warmup_lr: 2e-5 warmup_epoches: 10 lr_min: 5e-6