Uchihadj commited on
Commit
da614ce
·
verified ·
1 Parent(s): 2c268d1

Upload 10 files

Browse files
compression_exp/compression_0/config.yaml ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: fax # only used for demonstration data api
2
+ root_dir: '/data/s2/semantic-opv2v/train'
3
+ validate_dir: '/data/s2/semantic-opv2v/test'
4
+
5
+
6
+ train_params:
7
+ batch_size: &batch_size 1
8
+ epoches: &epoches 151
9
+ eval_freq: 5
10
+ save_freq: 5
11
+ max_cav: &max_cav 5
12
+ visible: true
13
+
14
+
15
+ fusion:
16
+ core_method: 'CamIntermediateFusionDataset' # LateFusionDataset, EarlyFusionDataset, IntermediateFusionDataset supported
17
+ args: []
18
+
19
+
20
+ data_augment: []
21
+ add_data_extension: ['bev_dynamic.png', 'bev_static.png', 'bev_lane.png', 'bev_visibility.png', 'bev_visibility_corp.png']
22
+
23
+ # preprocess-related
24
+ preprocess:
25
+ # options: BasePreprocessor, VoxelPreprocessor, BevPreprocessor
26
+ core_method: 'RgbPreprocessor'
27
+ args:
28
+ bgr2rgb: true
29
+ resize_x: &image_width 512
30
+ resize_y: &image_height 512
31
+ mean: [0.485, 0.456, 0.406]
32
+ std: [0.229, 0.224, 0.225]
33
+ # object evaluation range
34
+ cav_lidar_range: &cav_lidar [-50, -50, -3, 50, 50, 1]
35
+
36
+
37
+ # anchor box related
38
+ postprocess:
39
+ core_method: 'CameraBevPostprocessor' # VoxelPostprocessor, BevPostprocessor supported
40
+ anchor_args:
41
+ cav_lidar_range: *cav_lidar
42
+ order: 'hwl' # hwl or lwh
43
+ max_num: 100 # maximum number of objects in a single frame. use this number to make sure different frames has the same dimension in the same batch
44
+ nms_thresh: 0.15
45
+
46
+ model:
47
+ core_method: corpbevt
48
+ args:
49
+ target: &target 'dynamic' #'dynamic' dynamic, static or both
50
+ max_cav: *max_cav
51
+ encoder:
52
+ num_layers: 34
53
+ pretrained: true
54
+ image_width: *image_width
55
+ image_height: *image_height
56
+ id_pick: [1, 2, 3]
57
+
58
+ compression: 0 #64 #0 #8 #64 #0 # compression rate
59
+
60
+ decoder:
61
+ input_dim: 128
62
+ num_layer: 3
63
+ num_ch_dec: &decoder_block [32, 64, 128]
64
+
65
+ fax:
66
+ dim: [128, 128, 128] # b, d, h w from resenet -> b 256 h w
67
+ middle: [2, 2, 2] # middle conv
68
+ bev_embedding:
69
+ sigma: 1.0
70
+ bev_height: 256
71
+ bev_width: 256
72
+ h_meters: 100
73
+ w_meters: 100
74
+ offset: 0.0
75
+ upsample_scales: [2, 4, 8]
76
+
77
+ cross_view: #cross_view attention
78
+ image_height: *image_height
79
+ image_width: *image_width
80
+ no_image_features: False
81
+ skip: True
82
+ heads: [4, 4, 4]
83
+ dim_head: [32, 32, 32]
84
+ qkv_bias: True
85
+
86
+ cross_view_swap:
87
+ rel_pos_emb: False
88
+ q_win_size: [ [ 16, 16 ], [ 16, 16 ], [ 32, 32 ] ]
89
+ feat_win_size: [ [ 8, 8 ], [ 8, 8 ], [ 16, 16 ] ]
90
+ bev_embedding_flag: [ true, false, false ]
91
+
92
+ self_attn:
93
+ dim_head: 32
94
+ dropout: 0.1
95
+ window_size: 32
96
+
97
+ sttf: &sttf
98
+ resolution: 0.390625 # m/pixel
99
+ downsample_rate: 8
100
+ use_roi_mask: true
101
+
102
+ fax_fusion:
103
+ input_dim: 128
104
+ mlp_dim: 256
105
+ agent_size: *max_cav
106
+ window_size: 8
107
+ dim_head: 32
108
+ drop_out: 0.1
109
+ depth: 3
110
+ mask: true
111
+
112
+
113
+ seg_head_dim: 32
114
+ output_class: 2
115
+
116
+ loss:
117
+ core_method: vanilla_seg_loss
118
+ args:
119
+ target: *target
120
+ d_weights: 75.0
121
+ s_weights: 15.0
122
+ d_coe: 2.0
123
+ s_coe: 0.0
124
+
125
+ optimizer:
126
+ core_method: AdamW
127
+ lr: 2e-4
128
+ args:
129
+ eps: 1e-10
130
+ weight_decay: 1e-2
131
+
132
+ lr_scheduler:
133
+ core_method: cosineannealwarm #step, multistep, Exponential and cosineannealwarm support
134
+ epoches: *epoches
135
+ warmup_lr: 2e-5
136
+ warmup_epoches: 10
137
+ lr_min: 5e-6
compression_exp/compression_0/net_epoch151.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5af040f45bdf102e4a7776cfb684073bbf9aea74ba6b2abd24b0e9fdbcf7c0fa
3
+ size 113990249
compression_exp/compression_32/config.yaml ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: fax # only used for demonstration data api
2
+ root_dir: '/data/s2/semantic-opv2v/train'
3
+ validate_dir: '/data/s2/semantic-opv2v/test'
4
+
5
+
6
+ train_params:
7
+ batch_size: &batch_size 1
8
+ epoches: &epoches 71
9
+ eval_freq: 5
10
+ save_freq: 5
11
+ max_cav: &max_cav 5
12
+ visible: true
13
+
14
+
15
+ fusion:
16
+ core_method: 'CamIntermediateFusionDataset' # LateFusionDataset, EarlyFusionDataset, IntermediateFusionDataset supported
17
+ args: []
18
+
19
+
20
+ data_augment: []
21
+ add_data_extension: ['bev_dynamic.png', 'bev_static.png', 'bev_lane.png', 'bev_visibility.png', 'bev_visibility_corp.png']
22
+
23
+ # preprocess-related
24
+ preprocess:
25
+ # options: BasePreprocessor, VoxelPreprocessor, BevPreprocessor
26
+ core_method: 'RgbPreprocessor'
27
+ args:
28
+ bgr2rgb: true
29
+ resize_x: &image_width 512
30
+ resize_y: &image_height 512
31
+ mean: [0.485, 0.456, 0.406]
32
+ std: [0.229, 0.224, 0.225]
33
+ # object evaluation range
34
+ cav_lidar_range: &cav_lidar [-50, -50, -3, 50, 50, 1]
35
+
36
+
37
+ # anchor box related
38
+ postprocess:
39
+ core_method: 'CameraBevPostprocessor' # VoxelPostprocessor, BevPostprocessor supported
40
+ anchor_args:
41
+ cav_lidar_range: *cav_lidar
42
+ order: 'hwl' # hwl or lwh
43
+ max_num: 100 # maximum number of objects in a single frame. use this number to make sure different frames has the same dimension in the same batch
44
+ nms_thresh: 0.15
45
+
46
+ model:
47
+ core_method: corpbevt
48
+ args:
49
+ target: &target 'dynamic' #'dynamic' dynamic, static or both
50
+ max_cav: *max_cav
51
+ encoder:
52
+ num_layers: 34
53
+ pretrained: true
54
+ image_width: *image_width
55
+ image_height: *image_height
56
+ id_pick: [1, 2, 3]
57
+
58
+ compression: 32 #1 #0.2 #2 #0 #64 #0 #8 #64 #0 # compression rate
59
+
60
+ decoder:
61
+ input_dim: 128
62
+ num_layer: 3
63
+ num_ch_dec: &decoder_block [32, 64, 128]
64
+
65
+ fax:
66
+ dim: [128, 128, 128] # b, d, h w from resenet -> b 256 h w
67
+ middle: [2, 2, 2] # middle conv
68
+ bev_embedding:
69
+ sigma: 1.0
70
+ bev_height: 256
71
+ bev_width: 256
72
+ h_meters: 100
73
+ w_meters: 100
74
+ offset: 0.0
75
+ upsample_scales: [2, 4, 8]
76
+
77
+ cross_view: #cross_view attention
78
+ image_height: *image_height
79
+ image_width: *image_width
80
+ no_image_features: False
81
+ skip: True
82
+ heads: [4, 4, 4]
83
+ dim_head: [32, 32, 32]
84
+ qkv_bias: True
85
+
86
+ cross_view_swap:
87
+ rel_pos_emb: False
88
+ q_win_size: [ [ 16, 16 ], [ 16, 16 ], [ 32, 32 ] ]
89
+ feat_win_size: [ [ 8, 8 ], [ 8, 8 ], [ 16, 16 ] ]
90
+ bev_embedding_flag: [ true, false, false ]
91
+
92
+ self_attn:
93
+ dim_head: 32
94
+ dropout: 0.1
95
+ window_size: 32
96
+
97
+ sttf: &sttf
98
+ resolution: 0.390625 # m/pixel
99
+ downsample_rate: 8
100
+ use_roi_mask: true
101
+
102
+ fax_fusion:
103
+ input_dim: 128
104
+ mlp_dim: 256
105
+ agent_size: *max_cav
106
+ window_size: 8
107
+ dim_head: 32
108
+ drop_out: 0.1
109
+ depth: 3
110
+ mask: true
111
+
112
+
113
+ seg_head_dim: 32
114
+ output_class: 2
115
+
116
+ loss:
117
+ core_method: vanilla_seg_loss
118
+ args:
119
+ target: *target
120
+ d_weights: 75.0
121
+ s_weights: 15.0
122
+ d_coe: 2.0
123
+ s_coe: 0.0
124
+
125
+ optimizer:
126
+ core_method: AdamW
127
+ lr: 2e-4
128
+ args:
129
+ eps: 1e-10
130
+ weight_decay: 1e-2
131
+
132
+ lr_scheduler:
133
+ core_method: cosineannealwarm #step, multistep, Exponential and cosineannealwarm support
134
+ epoches: *epoches
135
+ warmup_lr: 2e-5
136
+ warmup_epoches: 10
137
+ lr_min: 5e-6
compression_exp/compression_32/net_epoch71.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80dc7a5c5bce3bc40d8d1929111c5caadcf93318a9774246f90bea7bcd103f60
3
+ size 114596103
compression_exp/compression_4/config.yaml ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: fax # only used for demonstration data api
2
+ root_dir: '/data/s2/semantic-opv2v/train'
3
+ validate_dir: '/data/s2/semantic-opv2v/test'
4
+
5
+
6
+ train_params:
7
+ batch_size: &batch_size 1
8
+ epoches: &epoches 71
9
+ eval_freq: 5
10
+ save_freq: 5
11
+ max_cav: &max_cav 5
12
+ visible: true
13
+
14
+
15
+ fusion:
16
+ core_method: 'CamIntermediateFusionDataset' # LateFusionDataset, EarlyFusionDataset, IntermediateFusionDataset supported
17
+ args: []
18
+
19
+
20
+ data_augment: []
21
+ add_data_extension: ['bev_dynamic.png', 'bev_static.png', 'bev_lane.png', 'bev_visibility.png', 'bev_visibility_corp.png']
22
+
23
+ # preprocess-related
24
+ preprocess:
25
+ # options: BasePreprocessor, VoxelPreprocessor, BevPreprocessor
26
+ core_method: 'RgbPreprocessor'
27
+ args:
28
+ bgr2rgb: true
29
+ resize_x: &image_width 512
30
+ resize_y: &image_height 512
31
+ mean: [0.485, 0.456, 0.406]
32
+ std: [0.229, 0.224, 0.225]
33
+ # object evaluation range
34
+ cav_lidar_range: &cav_lidar [-50, -50, -3, 50, 50, 1]
35
+
36
+
37
+ # anchor box related
38
+ postprocess:
39
+ core_method: 'CameraBevPostprocessor' # VoxelPostprocessor, BevPostprocessor supported
40
+ anchor_args:
41
+ cav_lidar_range: *cav_lidar
42
+ order: 'hwl' # hwl or lwh
43
+ max_num: 100 # maximum number of objects in a single frame. use this number to make sure different frames has the same dimension in the same batch
44
+ nms_thresh: 0.15
45
+
46
+ model:
47
+ core_method: corpbevt
48
+ args:
49
+ target: &target 'dynamic' #'dynamic' dynamic, static or both
50
+ max_cav: *max_cav
51
+ encoder:
52
+ num_layers: 34
53
+ pretrained: true
54
+ image_width: *image_width
55
+ image_height: *image_height
56
+ id_pick: [1, 2, 3]
57
+
58
+ compression: 4 #1 #0.2 #2 #0 #64 #0 #8 #64 #0 # compression rate
59
+
60
+ decoder:
61
+ input_dim: 128
62
+ num_layer: 3
63
+ num_ch_dec: &decoder_block [32, 64, 128]
64
+
65
+ fax:
66
+ dim: [128, 128, 128] # b, d, h w from resenet -> b 256 h w
67
+ middle: [2, 2, 2] # middle conv
68
+ bev_embedding:
69
+ sigma: 1.0
70
+ bev_height: 256
71
+ bev_width: 256
72
+ h_meters: 100
73
+ w_meters: 100
74
+ offset: 0.0
75
+ upsample_scales: [2, 4, 8]
76
+
77
+ cross_view: #cross_view attention
78
+ image_height: *image_height
79
+ image_width: *image_width
80
+ no_image_features: False
81
+ skip: True
82
+ heads: [4, 4, 4]
83
+ dim_head: [32, 32, 32]
84
+ qkv_bias: True
85
+
86
+ cross_view_swap:
87
+ rel_pos_emb: False
88
+ q_win_size: [ [ 16, 16 ], [ 16, 16 ], [ 32, 32 ] ]
89
+ feat_win_size: [ [ 8, 8 ], [ 8, 8 ], [ 16, 16 ] ]
90
+ bev_embedding_flag: [ true, false, false ]
91
+
92
+ self_attn:
93
+ dim_head: 32
94
+ dropout: 0.1
95
+ window_size: 32
96
+
97
+ sttf: &sttf
98
+ resolution: 0.390625 # m/pixel
99
+ downsample_rate: 8
100
+ use_roi_mask: true
101
+
102
+ fax_fusion:
103
+ input_dim: 128
104
+ mlp_dim: 256
105
+ agent_size: *max_cav
106
+ window_size: 8
107
+ dim_head: 32
108
+ drop_out: 0.1
109
+ depth: 3
110
+ mask: true
111
+
112
+
113
+ seg_head_dim: 32
114
+ output_class: 2
115
+
116
+ loss:
117
+ core_method: vanilla_seg_loss
118
+ args:
119
+ target: *target
120
+ d_weights: 75.0
121
+ s_weights: 15.0
122
+ d_coe: 2.0
123
+ s_coe: 0.0
124
+
125
+ optimizer:
126
+ core_method: AdamW
127
+ lr: 2e-4
128
+ args:
129
+ eps: 1e-10
130
+ weight_decay: 1e-2
131
+
132
+ lr_scheduler:
133
+ core_method: cosineannealwarm #step, multistep, Exponential and cosineannealwarm support
134
+ epoches: *epoches
135
+ warmup_lr: 2e-5
136
+ warmup_epoches: 10
137
+ lr_min: 5e-6
compression_exp/compression_4/net_epoch41.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:744ab1d8267a02c87c0cf687ca522ae4686ba0729a8fda3f0773a554ed855278
3
+ size 114625415
compression_exp/compression_64/config.yaml ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: fax # only used for demonstration data api
2
+ root_dir: '/data/s2/semantic-opv2v/train'
3
+ validate_dir: '/data/s2/semantic-opv2v/test'
4
+
5
+
6
+ train_params:
7
+ batch_size: &batch_size 1
8
+ epoches: &epoches 71
9
+ eval_freq: 5
10
+ save_freq: 5
11
+ max_cav: &max_cav 5
12
+ visible: true
13
+
14
+
15
+ fusion:
16
+ core_method: 'CamIntermediateFusionDataset' # LateFusionDataset, EarlyFusionDataset, IntermediateFusionDataset supported
17
+ args: []
18
+
19
+
20
+ data_augment: []
21
+ add_data_extension: ['bev_dynamic.png', 'bev_static.png', 'bev_lane.png', 'bev_visibility.png', 'bev_visibility_corp.png']
22
+
23
+ # preprocess-related
24
+ preprocess:
25
+ # options: BasePreprocessor, VoxelPreprocessor, BevPreprocessor
26
+ core_method: 'RgbPreprocessor'
27
+ args:
28
+ bgr2rgb: true
29
+ resize_x: &image_width 512
30
+ resize_y: &image_height 512
31
+ mean: [0.485, 0.456, 0.406]
32
+ std: [0.229, 0.224, 0.225]
33
+ # object evaluation range
34
+ cav_lidar_range: &cav_lidar [-50, -50, -3, 50, 50, 1]
35
+
36
+
37
+ # anchor box related
38
+ postprocess:
39
+ core_method: 'CameraBevPostprocessor' # VoxelPostprocessor, BevPostprocessor supported
40
+ anchor_args:
41
+ cav_lidar_range: *cav_lidar
42
+ order: 'hwl' # hwl or lwh
43
+ max_num: 100 # maximum number of objects in a single frame. use this number to make sure different frames has the same dimension in the same batch
44
+ nms_thresh: 0.15
45
+
46
+ model:
47
+ core_method: corpbevt
48
+ args:
49
+ target: &target 'dynamic' #'dynamic' dynamic, static or both
50
+ max_cav: *max_cav
51
+ encoder:
52
+ num_layers: 34
53
+ pretrained: true
54
+ image_width: *image_width
55
+ image_height: *image_height
56
+ id_pick: [1, 2, 3]
57
+
58
+ compression: 64 #1 #0.2 #2 #0 #64 #0 #8 #64 #0 # compression rate
59
+
60
+ decoder:
61
+ input_dim: 128
62
+ num_layer: 3
63
+ num_ch_dec: &decoder_block [32, 64, 128]
64
+
65
+ fax:
66
+ dim: [128, 128, 128] # b, d, h w from resenet -> b 256 h w
67
+ middle: [2, 2, 2] # middle conv
68
+ bev_embedding:
69
+ sigma: 1.0
70
+ bev_height: 256
71
+ bev_width: 256
72
+ h_meters: 100
73
+ w_meters: 100
74
+ offset: 0.0
75
+ upsample_scales: [2, 4, 8]
76
+
77
+ cross_view: #cross_view attention
78
+ image_height: *image_height
79
+ image_width: *image_width
80
+ no_image_features: False
81
+ skip: True
82
+ heads: [4, 4, 4]
83
+ dim_head: [32, 32, 32]
84
+ qkv_bias: True
85
+
86
+ cross_view_swap:
87
+ rel_pos_emb: False
88
+ q_win_size: [ [ 16, 16 ], [ 16, 16 ], [ 32, 32 ] ]
89
+ feat_win_size: [ [ 8, 8 ], [ 8, 8 ], [ 16, 16 ] ]
90
+ bev_embedding_flag: [ true, false, false ]
91
+
92
+ self_attn:
93
+ dim_head: 32
94
+ dropout: 0.1
95
+ window_size: 32
96
+
97
+ sttf: &sttf
98
+ resolution: 0.390625 # m/pixel
99
+ downsample_rate: 8
100
+ use_roi_mask: true
101
+
102
+ fax_fusion:
103
+ input_dim: 128
104
+ mlp_dim: 256
105
+ agent_size: *max_cav
106
+ window_size: 8
107
+ dim_head: 32
108
+ drop_out: 0.1
109
+ depth: 3
110
+ mask: true
111
+
112
+
113
+ seg_head_dim: 32
114
+ output_class: 2
115
+
116
+ loss:
117
+ core_method: vanilla_seg_loss
118
+ args:
119
+ target: *target
120
+ d_weights: 75.0
121
+ s_weights: 15.0
122
+ d_coe: 2.0
123
+ s_coe: 0.0
124
+
125
+ optimizer:
126
+ core_method: AdamW
127
+ lr: 2e-4
128
+ args:
129
+ eps: 1e-10
130
+ weight_decay: 1e-2
131
+
132
+ lr_scheduler:
133
+ core_method: cosineannealwarm #step, multistep, Exponential and cosineannealwarm support
134
+ epoches: *epoches
135
+ warmup_lr: 2e-5
136
+ warmup_epoches: 10
137
+ lr_min: 5e-6
compression_exp/compression_64/net_epoch71.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86b69e637c73e6c86000e2e9ca91edda48172da46a80524edd2d2510cde19941
3
+ size 114594055
compression_exp/compression_8/config.yaml ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: fax # only used for demonstration data api
2
+ root_dir: '/data/s2/semantic-opv2v/train'
3
+ validate_dir: '/data/s2/semantic-opv2v/test'
4
+
5
+
6
+ train_params:
7
+ batch_size: &batch_size 1
8
+ epoches: &epoches 71
9
+ eval_freq: 5
10
+ save_freq: 5
11
+ max_cav: &max_cav 5
12
+ visible: true
13
+
14
+
15
+ fusion:
16
+ core_method: 'CamIntermediateFusionDataset' # LateFusionDataset, EarlyFusionDataset, IntermediateFusionDataset supported
17
+ args: []
18
+
19
+
20
+ data_augment: []
21
+ add_data_extension: ['bev_dynamic.png', 'bev_static.png', 'bev_lane.png', 'bev_visibility.png', 'bev_visibility_corp.png']
22
+
23
+ # preprocess-related
24
+ preprocess:
25
+ # options: BasePreprocessor, VoxelPreprocessor, BevPreprocessor
26
+ core_method: 'RgbPreprocessor'
27
+ args:
28
+ bgr2rgb: true
29
+ resize_x: &image_width 512
30
+ resize_y: &image_height 512
31
+ mean: [0.485, 0.456, 0.406]
32
+ std: [0.229, 0.224, 0.225]
33
+ # object evaluation range
34
+ cav_lidar_range: &cav_lidar [-50, -50, -3, 50, 50, 1]
35
+
36
+
37
+ # anchor box related
38
+ postprocess:
39
+ core_method: 'CameraBevPostprocessor' # VoxelPostprocessor, BevPostprocessor supported
40
+ anchor_args:
41
+ cav_lidar_range: *cav_lidar
42
+ order: 'hwl' # hwl or lwh
43
+ max_num: 100 # maximum number of objects in a single frame. use this number to make sure different frames has the same dimension in the same batch
44
+ nms_thresh: 0.15
45
+
46
+ model:
47
+ core_method: corpbevt
48
+ args:
49
+ target: &target 'dynamic' #'dynamic' dynamic, static or both
50
+ max_cav: *max_cav
51
+ encoder:
52
+ num_layers: 34
53
+ pretrained: true
54
+ image_width: *image_width
55
+ image_height: *image_height
56
+ id_pick: [1, 2, 3]
57
+
58
+ compression: 8 #0.2 #2 #0 #64 #0 #8 #64 #0 # compression rate
59
+
60
+ decoder:
61
+ input_dim: 128
62
+ num_layer: 3
63
+ num_ch_dec: &decoder_block [32, 64, 128]
64
+
65
+ fax:
66
+ dim: [128, 128, 128] # b, d, h w from resenet -> b 256 h w
67
+ middle: [2, 2, 2] # middle conv
68
+ bev_embedding:
69
+ sigma: 1.0
70
+ bev_height: 256
71
+ bev_width: 256
72
+ h_meters: 100
73
+ w_meters: 100
74
+ offset: 0.0
75
+ upsample_scales: [2, 4, 8]
76
+
77
+ cross_view: #cross_view attention
78
+ image_height: *image_height
79
+ image_width: *image_width
80
+ no_image_features: False
81
+ skip: True
82
+ heads: [4, 4, 4]
83
+ dim_head: [32, 32, 32]
84
+ qkv_bias: True
85
+
86
+ cross_view_swap:
87
+ rel_pos_emb: False
88
+ q_win_size: [ [ 16, 16 ], [ 16, 16 ], [ 32, 32 ] ]
89
+ feat_win_size: [ [ 8, 8 ], [ 8, 8 ], [ 16, 16 ] ]
90
+ bev_embedding_flag: [ true, false, false ]
91
+
92
+ self_attn:
93
+ dim_head: 32
94
+ dropout: 0.1
95
+ window_size: 32
96
+
97
+ sttf: &sttf
98
+ resolution: 0.390625 # m/pixel
99
+ downsample_rate: 8
100
+ use_roi_mask: true
101
+
102
+ fax_fusion:
103
+ input_dim: 128
104
+ mlp_dim: 256
105
+ agent_size: *max_cav
106
+ window_size: 8
107
+ dim_head: 32
108
+ drop_out: 0.1
109
+ depth: 3
110
+ mask: true
111
+
112
+
113
+ seg_head_dim: 32
114
+ output_class: 2
115
+
116
+ loss:
117
+ core_method: vanilla_seg_loss
118
+ args:
119
+ target: *target
120
+ d_weights: 75.0
121
+ s_weights: 15.0
122
+ d_coe: 2.0
123
+ s_coe: 0.0
124
+
125
+ optimizer:
126
+ core_method: AdamW
127
+ lr: 2e-4
128
+ args:
129
+ eps: 1e-10
130
+ weight_decay: 1e-2
131
+
132
+ lr_scheduler:
133
+ core_method: cosineannealwarm #step, multistep, Exponential and cosineannealwarm support
134
+ epoches: *epoches
135
+ warmup_lr: 2e-5
136
+ warmup_epoches: 10
137
+ lr_min: 5e-6
compression_exp/compression_8/net_epoch71.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f076cba4beefd596c69a4f781bf5719225561083fe0329949b89ae98b93cbcc
3
+ size 114608711