Uchihadj
/

Hyper-V2X

autonomous-driving

cooperative-perception

uncertainty-estimation

Model card Files Files and versions

Hyper-V2X / compression_exp /compression_8 /config.yaml

Uchihadj's picture

Upload 10 files

da614ce verified 1 day ago

history blame contribute delete

3.32 kB

	name: fax # only used for demonstration data api
	root_dir: '/data/s2/semantic-opv2v/train'
	validate_dir: '/data/s2/semantic-opv2v/test'


	train_params:
	batch_size: &batch_size 1
	epoches: &epoches 71
	eval_freq: 5
	save_freq: 5
	max_cav: &max_cav 5
	visible: true


	fusion:
	core_method: 'CamIntermediateFusionDataset' # LateFusionDataset, EarlyFusionDataset, IntermediateFusionDataset supported
	args: []


	data_augment: []
	add_data_extension: ['bev_dynamic.png', 'bev_static.png', 'bev_lane.png', 'bev_visibility.png', 'bev_visibility_corp.png']

	# preprocess-related
	preprocess:
	# options: BasePreprocessor, VoxelPreprocessor, BevPreprocessor
	core_method: 'RgbPreprocessor'
	args:
	bgr2rgb: true
	resize_x: &image_width 512
	resize_y: &image_height 512
	mean: [0.485, 0.456, 0.406]
	std: [0.229, 0.224, 0.225]
	# object evaluation range
	cav_lidar_range: &cav_lidar [-50, -50, -3, 50, 50, 1]


	# anchor box related
	postprocess:
	core_method: 'CameraBevPostprocessor' # VoxelPostprocessor, BevPostprocessor supported
	anchor_args:
	cav_lidar_range: *cav_lidar
	order: 'hwl' # hwl or lwh
	max_num: 100 # maximum number of objects in a single frame. use this number to make sure different frames has the same dimension in the same batch
	nms_thresh: 0.15

	model:
	core_method: corpbevt
	args:
	target: &target 'dynamic' #'dynamic' dynamic, static or both
	max_cav: *max_cav
	encoder:
	num_layers: 34
	pretrained: true
	image_width: *image_width
	image_height: *image_height
	id_pick: [1, 2, 3]

	compression: 8 #0.2 #2 #0 #64 #0 #8 #64 #0 # compression rate

	decoder:
	input_dim: 128
	num_layer: 3
	num_ch_dec: &decoder_block [32, 64, 128]

	fax:
	dim: [128, 128, 128] # b, d, h w from resenet -> b 256 h w
	middle: [2, 2, 2] # middle conv
	bev_embedding:
	sigma: 1.0
	bev_height: 256
	bev_width: 256
	h_meters: 100
	w_meters: 100
	offset: 0.0
	upsample_scales: [2, 4, 8]

	cross_view: #cross_view attention
	image_height: *image_height
	image_width: *image_width
	no_image_features: False
	skip: True
	heads: [4, 4, 4]
	dim_head: [32, 32, 32]
	qkv_bias: True

	cross_view_swap:
	rel_pos_emb: False
	q_win_size: [ [ 16, 16 ], [ 16, 16 ], [ 32, 32 ] ]
	feat_win_size: [ [ 8, 8 ], [ 8, 8 ], [ 16, 16 ] ]
	bev_embedding_flag: [ true, false, false ]

	self_attn:
	dim_head: 32
	dropout: 0.1
	window_size: 32

	sttf: &sttf
	resolution: 0.390625 # m/pixel
	downsample_rate: 8
	use_roi_mask: true

	fax_fusion:
	input_dim: 128
	mlp_dim: 256
	agent_size: *max_cav
	window_size: 8
	dim_head: 32
	drop_out: 0.1
	depth: 3
	mask: true


	seg_head_dim: 32
	output_class: 2

	loss:
	core_method: vanilla_seg_loss
	args:
	target: *target
	d_weights: 75.0
	s_weights: 15.0
	d_coe: 2.0
	s_coe: 0.0

	optimizer:
	core_method: AdamW
	lr: 2e-4
	args:
	eps: 1e-10
	weight_decay: 1e-2

	lr_scheduler:
	core_method: cosineannealwarm #step, multistep, Exponential and cosineannealwarm support
	epoches: *epoches
	warmup_lr: 2e-5
	warmup_epoches: 10
	lr_min: 5e-6