Uchihadj's picture
Upload 10 files
da614ce verified
name: fax # only used for demonstration data api
root_dir: '/data/s2/semantic-opv2v/train'
validate_dir: '/data/s2/semantic-opv2v/test'
train_params:
batch_size: &batch_size 1
epoches: &epoches 71
eval_freq: 5
save_freq: 5
max_cav: &max_cav 5
visible: true
fusion:
core_method: 'CamIntermediateFusionDataset' # LateFusionDataset, EarlyFusionDataset, IntermediateFusionDataset supported
args: []
data_augment: []
add_data_extension: ['bev_dynamic.png', 'bev_static.png', 'bev_lane.png', 'bev_visibility.png', 'bev_visibility_corp.png']
# preprocess-related
preprocess:
# options: BasePreprocessor, VoxelPreprocessor, BevPreprocessor
core_method: 'RgbPreprocessor'
args:
bgr2rgb: true
resize_x: &image_width 512
resize_y: &image_height 512
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
# object evaluation range
cav_lidar_range: &cav_lidar [-50, -50, -3, 50, 50, 1]
# anchor box related
postprocess:
core_method: 'CameraBevPostprocessor' # VoxelPostprocessor, BevPostprocessor supported
anchor_args:
cav_lidar_range: *cav_lidar
order: 'hwl' # hwl or lwh
max_num: 100 # maximum number of objects in a single frame. use this number to make sure different frames has the same dimension in the same batch
nms_thresh: 0.15
model:
core_method: corpbevt
args:
target: &target 'dynamic' #'dynamic' dynamic, static or both
max_cav: *max_cav
encoder:
num_layers: 34
pretrained: true
image_width: *image_width
image_height: *image_height
id_pick: [1, 2, 3]
compression: 8 #0.2 #2 #0 #64 #0 #8 #64 #0 # compression rate
decoder:
input_dim: 128
num_layer: 3
num_ch_dec: &decoder_block [32, 64, 128]
fax:
dim: [128, 128, 128] # b, d, h w from resenet -> b 256 h w
middle: [2, 2, 2] # middle conv
bev_embedding:
sigma: 1.0
bev_height: 256
bev_width: 256
h_meters: 100
w_meters: 100
offset: 0.0
upsample_scales: [2, 4, 8]
cross_view: #cross_view attention
image_height: *image_height
image_width: *image_width
no_image_features: False
skip: True
heads: [4, 4, 4]
dim_head: [32, 32, 32]
qkv_bias: True
cross_view_swap:
rel_pos_emb: False
q_win_size: [ [ 16, 16 ], [ 16, 16 ], [ 32, 32 ] ]
feat_win_size: [ [ 8, 8 ], [ 8, 8 ], [ 16, 16 ] ]
bev_embedding_flag: [ true, false, false ]
self_attn:
dim_head: 32
dropout: 0.1
window_size: 32
sttf: &sttf
resolution: 0.390625 # m/pixel
downsample_rate: 8
use_roi_mask: true
fax_fusion:
input_dim: 128
mlp_dim: 256
agent_size: *max_cav
window_size: 8
dim_head: 32
drop_out: 0.1
depth: 3
mask: true
seg_head_dim: 32
output_class: 2
loss:
core_method: vanilla_seg_loss
args:
target: *target
d_weights: 75.0
s_weights: 15.0
d_coe: 2.0
s_coe: 0.0
optimizer:
core_method: AdamW
lr: 2e-4
args:
eps: 1e-10
weight_decay: 1e-2
lr_scheduler:
core_method: cosineannealwarm #step, multistep, Exponential and cosineannealwarm support
epoches: *epoches
warmup_lr: 2e-5
warmup_epoches: 10
lr_min: 5e-6