Upload 10 files
Browse files- compression_exp/compression_0/config.yaml +137 -0
- compression_exp/compression_0/net_epoch151.pth +3 -0
- compression_exp/compression_32/config.yaml +137 -0
- compression_exp/compression_32/net_epoch71.pth +3 -0
- compression_exp/compression_4/config.yaml +137 -0
- compression_exp/compression_4/net_epoch41.pth +3 -0
- compression_exp/compression_64/config.yaml +137 -0
- compression_exp/compression_64/net_epoch71.pth +3 -0
- compression_exp/compression_8/config.yaml +137 -0
- compression_exp/compression_8/net_epoch71.pth +3 -0
compression_exp/compression_0/config.yaml
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: fax # only used for demonstration data api
|
| 2 |
+
root_dir: '/data/s2/semantic-opv2v/train'
|
| 3 |
+
validate_dir: '/data/s2/semantic-opv2v/test'
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
train_params:
|
| 7 |
+
batch_size: &batch_size 1
|
| 8 |
+
epoches: &epoches 151
|
| 9 |
+
eval_freq: 5
|
| 10 |
+
save_freq: 5
|
| 11 |
+
max_cav: &max_cav 5
|
| 12 |
+
visible: true
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
fusion:
|
| 16 |
+
core_method: 'CamIntermediateFusionDataset' # LateFusionDataset, EarlyFusionDataset, IntermediateFusionDataset supported
|
| 17 |
+
args: []
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
data_augment: []
|
| 21 |
+
add_data_extension: ['bev_dynamic.png', 'bev_static.png', 'bev_lane.png', 'bev_visibility.png', 'bev_visibility_corp.png']
|
| 22 |
+
|
| 23 |
+
# preprocess-related
|
| 24 |
+
preprocess:
|
| 25 |
+
# options: BasePreprocessor, VoxelPreprocessor, BevPreprocessor
|
| 26 |
+
core_method: 'RgbPreprocessor'
|
| 27 |
+
args:
|
| 28 |
+
bgr2rgb: true
|
| 29 |
+
resize_x: &image_width 512
|
| 30 |
+
resize_y: &image_height 512
|
| 31 |
+
mean: [0.485, 0.456, 0.406]
|
| 32 |
+
std: [0.229, 0.224, 0.225]
|
| 33 |
+
# object evaluation range
|
| 34 |
+
cav_lidar_range: &cav_lidar [-50, -50, -3, 50, 50, 1]
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
# anchor box related
|
| 38 |
+
postprocess:
|
| 39 |
+
core_method: 'CameraBevPostprocessor' # VoxelPostprocessor, BevPostprocessor supported
|
| 40 |
+
anchor_args:
|
| 41 |
+
cav_lidar_range: *cav_lidar
|
| 42 |
+
order: 'hwl' # hwl or lwh
|
| 43 |
+
max_num: 100 # maximum number of objects in a single frame. use this number to make sure different frames has the same dimension in the same batch
|
| 44 |
+
nms_thresh: 0.15
|
| 45 |
+
|
| 46 |
+
model:
|
| 47 |
+
core_method: corpbevt
|
| 48 |
+
args:
|
| 49 |
+
target: &target 'dynamic' #'dynamic' dynamic, static or both
|
| 50 |
+
max_cav: *max_cav
|
| 51 |
+
encoder:
|
| 52 |
+
num_layers: 34
|
| 53 |
+
pretrained: true
|
| 54 |
+
image_width: *image_width
|
| 55 |
+
image_height: *image_height
|
| 56 |
+
id_pick: [1, 2, 3]
|
| 57 |
+
|
| 58 |
+
compression: 0 #64 #0 #8 #64 #0 # compression rate
|
| 59 |
+
|
| 60 |
+
decoder:
|
| 61 |
+
input_dim: 128
|
| 62 |
+
num_layer: 3
|
| 63 |
+
num_ch_dec: &decoder_block [32, 64, 128]
|
| 64 |
+
|
| 65 |
+
fax:
|
| 66 |
+
dim: [128, 128, 128] # b, d, h w from resenet -> b 256 h w
|
| 67 |
+
middle: [2, 2, 2] # middle conv
|
| 68 |
+
bev_embedding:
|
| 69 |
+
sigma: 1.0
|
| 70 |
+
bev_height: 256
|
| 71 |
+
bev_width: 256
|
| 72 |
+
h_meters: 100
|
| 73 |
+
w_meters: 100
|
| 74 |
+
offset: 0.0
|
| 75 |
+
upsample_scales: [2, 4, 8]
|
| 76 |
+
|
| 77 |
+
cross_view: #cross_view attention
|
| 78 |
+
image_height: *image_height
|
| 79 |
+
image_width: *image_width
|
| 80 |
+
no_image_features: False
|
| 81 |
+
skip: True
|
| 82 |
+
heads: [4, 4, 4]
|
| 83 |
+
dim_head: [32, 32, 32]
|
| 84 |
+
qkv_bias: True
|
| 85 |
+
|
| 86 |
+
cross_view_swap:
|
| 87 |
+
rel_pos_emb: False
|
| 88 |
+
q_win_size: [ [ 16, 16 ], [ 16, 16 ], [ 32, 32 ] ]
|
| 89 |
+
feat_win_size: [ [ 8, 8 ], [ 8, 8 ], [ 16, 16 ] ]
|
| 90 |
+
bev_embedding_flag: [ true, false, false ]
|
| 91 |
+
|
| 92 |
+
self_attn:
|
| 93 |
+
dim_head: 32
|
| 94 |
+
dropout: 0.1
|
| 95 |
+
window_size: 32
|
| 96 |
+
|
| 97 |
+
sttf: &sttf
|
| 98 |
+
resolution: 0.390625 # m/pixel
|
| 99 |
+
downsample_rate: 8
|
| 100 |
+
use_roi_mask: true
|
| 101 |
+
|
| 102 |
+
fax_fusion:
|
| 103 |
+
input_dim: 128
|
| 104 |
+
mlp_dim: 256
|
| 105 |
+
agent_size: *max_cav
|
| 106 |
+
window_size: 8
|
| 107 |
+
dim_head: 32
|
| 108 |
+
drop_out: 0.1
|
| 109 |
+
depth: 3
|
| 110 |
+
mask: true
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
seg_head_dim: 32
|
| 114 |
+
output_class: 2
|
| 115 |
+
|
| 116 |
+
loss:
|
| 117 |
+
core_method: vanilla_seg_loss
|
| 118 |
+
args:
|
| 119 |
+
target: *target
|
| 120 |
+
d_weights: 75.0
|
| 121 |
+
s_weights: 15.0
|
| 122 |
+
d_coe: 2.0
|
| 123 |
+
s_coe: 0.0
|
| 124 |
+
|
| 125 |
+
optimizer:
|
| 126 |
+
core_method: AdamW
|
| 127 |
+
lr: 2e-4
|
| 128 |
+
args:
|
| 129 |
+
eps: 1e-10
|
| 130 |
+
weight_decay: 1e-2
|
| 131 |
+
|
| 132 |
+
lr_scheduler:
|
| 133 |
+
core_method: cosineannealwarm #step, multistep, Exponential and cosineannealwarm support
|
| 134 |
+
epoches: *epoches
|
| 135 |
+
warmup_lr: 2e-5
|
| 136 |
+
warmup_epoches: 10
|
| 137 |
+
lr_min: 5e-6
|
compression_exp/compression_0/net_epoch151.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5af040f45bdf102e4a7776cfb684073bbf9aea74ba6b2abd24b0e9fdbcf7c0fa
|
| 3 |
+
size 113990249
|
compression_exp/compression_32/config.yaml
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: fax # only used for demonstration data api
|
| 2 |
+
root_dir: '/data/s2/semantic-opv2v/train'
|
| 3 |
+
validate_dir: '/data/s2/semantic-opv2v/test'
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
train_params:
|
| 7 |
+
batch_size: &batch_size 1
|
| 8 |
+
epoches: &epoches 71
|
| 9 |
+
eval_freq: 5
|
| 10 |
+
save_freq: 5
|
| 11 |
+
max_cav: &max_cav 5
|
| 12 |
+
visible: true
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
fusion:
|
| 16 |
+
core_method: 'CamIntermediateFusionDataset' # LateFusionDataset, EarlyFusionDataset, IntermediateFusionDataset supported
|
| 17 |
+
args: []
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
data_augment: []
|
| 21 |
+
add_data_extension: ['bev_dynamic.png', 'bev_static.png', 'bev_lane.png', 'bev_visibility.png', 'bev_visibility_corp.png']
|
| 22 |
+
|
| 23 |
+
# preprocess-related
|
| 24 |
+
preprocess:
|
| 25 |
+
# options: BasePreprocessor, VoxelPreprocessor, BevPreprocessor
|
| 26 |
+
core_method: 'RgbPreprocessor'
|
| 27 |
+
args:
|
| 28 |
+
bgr2rgb: true
|
| 29 |
+
resize_x: &image_width 512
|
| 30 |
+
resize_y: &image_height 512
|
| 31 |
+
mean: [0.485, 0.456, 0.406]
|
| 32 |
+
std: [0.229, 0.224, 0.225]
|
| 33 |
+
# object evaluation range
|
| 34 |
+
cav_lidar_range: &cav_lidar [-50, -50, -3, 50, 50, 1]
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
# anchor box related
|
| 38 |
+
postprocess:
|
| 39 |
+
core_method: 'CameraBevPostprocessor' # VoxelPostprocessor, BevPostprocessor supported
|
| 40 |
+
anchor_args:
|
| 41 |
+
cav_lidar_range: *cav_lidar
|
| 42 |
+
order: 'hwl' # hwl or lwh
|
| 43 |
+
max_num: 100 # maximum number of objects in a single frame. use this number to make sure different frames has the same dimension in the same batch
|
| 44 |
+
nms_thresh: 0.15
|
| 45 |
+
|
| 46 |
+
model:
|
| 47 |
+
core_method: corpbevt
|
| 48 |
+
args:
|
| 49 |
+
target: &target 'dynamic' #'dynamic' dynamic, static or both
|
| 50 |
+
max_cav: *max_cav
|
| 51 |
+
encoder:
|
| 52 |
+
num_layers: 34
|
| 53 |
+
pretrained: true
|
| 54 |
+
image_width: *image_width
|
| 55 |
+
image_height: *image_height
|
| 56 |
+
id_pick: [1, 2, 3]
|
| 57 |
+
|
| 58 |
+
compression: 32 #1 #0.2 #2 #0 #64 #0 #8 #64 #0 # compression rate
|
| 59 |
+
|
| 60 |
+
decoder:
|
| 61 |
+
input_dim: 128
|
| 62 |
+
num_layer: 3
|
| 63 |
+
num_ch_dec: &decoder_block [32, 64, 128]
|
| 64 |
+
|
| 65 |
+
fax:
|
| 66 |
+
dim: [128, 128, 128] # b, d, h w from resenet -> b 256 h w
|
| 67 |
+
middle: [2, 2, 2] # middle conv
|
| 68 |
+
bev_embedding:
|
| 69 |
+
sigma: 1.0
|
| 70 |
+
bev_height: 256
|
| 71 |
+
bev_width: 256
|
| 72 |
+
h_meters: 100
|
| 73 |
+
w_meters: 100
|
| 74 |
+
offset: 0.0
|
| 75 |
+
upsample_scales: [2, 4, 8]
|
| 76 |
+
|
| 77 |
+
cross_view: #cross_view attention
|
| 78 |
+
image_height: *image_height
|
| 79 |
+
image_width: *image_width
|
| 80 |
+
no_image_features: False
|
| 81 |
+
skip: True
|
| 82 |
+
heads: [4, 4, 4]
|
| 83 |
+
dim_head: [32, 32, 32]
|
| 84 |
+
qkv_bias: True
|
| 85 |
+
|
| 86 |
+
cross_view_swap:
|
| 87 |
+
rel_pos_emb: False
|
| 88 |
+
q_win_size: [ [ 16, 16 ], [ 16, 16 ], [ 32, 32 ] ]
|
| 89 |
+
feat_win_size: [ [ 8, 8 ], [ 8, 8 ], [ 16, 16 ] ]
|
| 90 |
+
bev_embedding_flag: [ true, false, false ]
|
| 91 |
+
|
| 92 |
+
self_attn:
|
| 93 |
+
dim_head: 32
|
| 94 |
+
dropout: 0.1
|
| 95 |
+
window_size: 32
|
| 96 |
+
|
| 97 |
+
sttf: &sttf
|
| 98 |
+
resolution: 0.390625 # m/pixel
|
| 99 |
+
downsample_rate: 8
|
| 100 |
+
use_roi_mask: true
|
| 101 |
+
|
| 102 |
+
fax_fusion:
|
| 103 |
+
input_dim: 128
|
| 104 |
+
mlp_dim: 256
|
| 105 |
+
agent_size: *max_cav
|
| 106 |
+
window_size: 8
|
| 107 |
+
dim_head: 32
|
| 108 |
+
drop_out: 0.1
|
| 109 |
+
depth: 3
|
| 110 |
+
mask: true
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
seg_head_dim: 32
|
| 114 |
+
output_class: 2
|
| 115 |
+
|
| 116 |
+
loss:
|
| 117 |
+
core_method: vanilla_seg_loss
|
| 118 |
+
args:
|
| 119 |
+
target: *target
|
| 120 |
+
d_weights: 75.0
|
| 121 |
+
s_weights: 15.0
|
| 122 |
+
d_coe: 2.0
|
| 123 |
+
s_coe: 0.0
|
| 124 |
+
|
| 125 |
+
optimizer:
|
| 126 |
+
core_method: AdamW
|
| 127 |
+
lr: 2e-4
|
| 128 |
+
args:
|
| 129 |
+
eps: 1e-10
|
| 130 |
+
weight_decay: 1e-2
|
| 131 |
+
|
| 132 |
+
lr_scheduler:
|
| 133 |
+
core_method: cosineannealwarm #step, multistep, Exponential and cosineannealwarm support
|
| 134 |
+
epoches: *epoches
|
| 135 |
+
warmup_lr: 2e-5
|
| 136 |
+
warmup_epoches: 10
|
| 137 |
+
lr_min: 5e-6
|
compression_exp/compression_32/net_epoch71.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:80dc7a5c5bce3bc40d8d1929111c5caadcf93318a9774246f90bea7bcd103f60
|
| 3 |
+
size 114596103
|
compression_exp/compression_4/config.yaml
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: fax # only used for demonstration data api
|
| 2 |
+
root_dir: '/data/s2/semantic-opv2v/train'
|
| 3 |
+
validate_dir: '/data/s2/semantic-opv2v/test'
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
train_params:
|
| 7 |
+
batch_size: &batch_size 1
|
| 8 |
+
epoches: &epoches 71
|
| 9 |
+
eval_freq: 5
|
| 10 |
+
save_freq: 5
|
| 11 |
+
max_cav: &max_cav 5
|
| 12 |
+
visible: true
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
fusion:
|
| 16 |
+
core_method: 'CamIntermediateFusionDataset' # LateFusionDataset, EarlyFusionDataset, IntermediateFusionDataset supported
|
| 17 |
+
args: []
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
data_augment: []
|
| 21 |
+
add_data_extension: ['bev_dynamic.png', 'bev_static.png', 'bev_lane.png', 'bev_visibility.png', 'bev_visibility_corp.png']
|
| 22 |
+
|
| 23 |
+
# preprocess-related
|
| 24 |
+
preprocess:
|
| 25 |
+
# options: BasePreprocessor, VoxelPreprocessor, BevPreprocessor
|
| 26 |
+
core_method: 'RgbPreprocessor'
|
| 27 |
+
args:
|
| 28 |
+
bgr2rgb: true
|
| 29 |
+
resize_x: &image_width 512
|
| 30 |
+
resize_y: &image_height 512
|
| 31 |
+
mean: [0.485, 0.456, 0.406]
|
| 32 |
+
std: [0.229, 0.224, 0.225]
|
| 33 |
+
# object evaluation range
|
| 34 |
+
cav_lidar_range: &cav_lidar [-50, -50, -3, 50, 50, 1]
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
# anchor box related
|
| 38 |
+
postprocess:
|
| 39 |
+
core_method: 'CameraBevPostprocessor' # VoxelPostprocessor, BevPostprocessor supported
|
| 40 |
+
anchor_args:
|
| 41 |
+
cav_lidar_range: *cav_lidar
|
| 42 |
+
order: 'hwl' # hwl or lwh
|
| 43 |
+
max_num: 100 # maximum number of objects in a single frame. use this number to make sure different frames has the same dimension in the same batch
|
| 44 |
+
nms_thresh: 0.15
|
| 45 |
+
|
| 46 |
+
model:
|
| 47 |
+
core_method: corpbevt
|
| 48 |
+
args:
|
| 49 |
+
target: &target 'dynamic' #'dynamic' dynamic, static or both
|
| 50 |
+
max_cav: *max_cav
|
| 51 |
+
encoder:
|
| 52 |
+
num_layers: 34
|
| 53 |
+
pretrained: true
|
| 54 |
+
image_width: *image_width
|
| 55 |
+
image_height: *image_height
|
| 56 |
+
id_pick: [1, 2, 3]
|
| 57 |
+
|
| 58 |
+
compression: 4 #1 #0.2 #2 #0 #64 #0 #8 #64 #0 # compression rate
|
| 59 |
+
|
| 60 |
+
decoder:
|
| 61 |
+
input_dim: 128
|
| 62 |
+
num_layer: 3
|
| 63 |
+
num_ch_dec: &decoder_block [32, 64, 128]
|
| 64 |
+
|
| 65 |
+
fax:
|
| 66 |
+
dim: [128, 128, 128] # b, d, h w from resenet -> b 256 h w
|
| 67 |
+
middle: [2, 2, 2] # middle conv
|
| 68 |
+
bev_embedding:
|
| 69 |
+
sigma: 1.0
|
| 70 |
+
bev_height: 256
|
| 71 |
+
bev_width: 256
|
| 72 |
+
h_meters: 100
|
| 73 |
+
w_meters: 100
|
| 74 |
+
offset: 0.0
|
| 75 |
+
upsample_scales: [2, 4, 8]
|
| 76 |
+
|
| 77 |
+
cross_view: #cross_view attention
|
| 78 |
+
image_height: *image_height
|
| 79 |
+
image_width: *image_width
|
| 80 |
+
no_image_features: False
|
| 81 |
+
skip: True
|
| 82 |
+
heads: [4, 4, 4]
|
| 83 |
+
dim_head: [32, 32, 32]
|
| 84 |
+
qkv_bias: True
|
| 85 |
+
|
| 86 |
+
cross_view_swap:
|
| 87 |
+
rel_pos_emb: False
|
| 88 |
+
q_win_size: [ [ 16, 16 ], [ 16, 16 ], [ 32, 32 ] ]
|
| 89 |
+
feat_win_size: [ [ 8, 8 ], [ 8, 8 ], [ 16, 16 ] ]
|
| 90 |
+
bev_embedding_flag: [ true, false, false ]
|
| 91 |
+
|
| 92 |
+
self_attn:
|
| 93 |
+
dim_head: 32
|
| 94 |
+
dropout: 0.1
|
| 95 |
+
window_size: 32
|
| 96 |
+
|
| 97 |
+
sttf: &sttf
|
| 98 |
+
resolution: 0.390625 # m/pixel
|
| 99 |
+
downsample_rate: 8
|
| 100 |
+
use_roi_mask: true
|
| 101 |
+
|
| 102 |
+
fax_fusion:
|
| 103 |
+
input_dim: 128
|
| 104 |
+
mlp_dim: 256
|
| 105 |
+
agent_size: *max_cav
|
| 106 |
+
window_size: 8
|
| 107 |
+
dim_head: 32
|
| 108 |
+
drop_out: 0.1
|
| 109 |
+
depth: 3
|
| 110 |
+
mask: true
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
seg_head_dim: 32
|
| 114 |
+
output_class: 2
|
| 115 |
+
|
| 116 |
+
loss:
|
| 117 |
+
core_method: vanilla_seg_loss
|
| 118 |
+
args:
|
| 119 |
+
target: *target
|
| 120 |
+
d_weights: 75.0
|
| 121 |
+
s_weights: 15.0
|
| 122 |
+
d_coe: 2.0
|
| 123 |
+
s_coe: 0.0
|
| 124 |
+
|
| 125 |
+
optimizer:
|
| 126 |
+
core_method: AdamW
|
| 127 |
+
lr: 2e-4
|
| 128 |
+
args:
|
| 129 |
+
eps: 1e-10
|
| 130 |
+
weight_decay: 1e-2
|
| 131 |
+
|
| 132 |
+
lr_scheduler:
|
| 133 |
+
core_method: cosineannealwarm #step, multistep, Exponential and cosineannealwarm support
|
| 134 |
+
epoches: *epoches
|
| 135 |
+
warmup_lr: 2e-5
|
| 136 |
+
warmup_epoches: 10
|
| 137 |
+
lr_min: 5e-6
|
compression_exp/compression_4/net_epoch41.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:744ab1d8267a02c87c0cf687ca522ae4686ba0729a8fda3f0773a554ed855278
|
| 3 |
+
size 114625415
|
compression_exp/compression_64/config.yaml
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: fax # only used for demonstration data api
|
| 2 |
+
root_dir: '/data/s2/semantic-opv2v/train'
|
| 3 |
+
validate_dir: '/data/s2/semantic-opv2v/test'
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
train_params:
|
| 7 |
+
batch_size: &batch_size 1
|
| 8 |
+
epoches: &epoches 71
|
| 9 |
+
eval_freq: 5
|
| 10 |
+
save_freq: 5
|
| 11 |
+
max_cav: &max_cav 5
|
| 12 |
+
visible: true
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
fusion:
|
| 16 |
+
core_method: 'CamIntermediateFusionDataset' # LateFusionDataset, EarlyFusionDataset, IntermediateFusionDataset supported
|
| 17 |
+
args: []
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
data_augment: []
|
| 21 |
+
add_data_extension: ['bev_dynamic.png', 'bev_static.png', 'bev_lane.png', 'bev_visibility.png', 'bev_visibility_corp.png']
|
| 22 |
+
|
| 23 |
+
# preprocess-related
|
| 24 |
+
preprocess:
|
| 25 |
+
# options: BasePreprocessor, VoxelPreprocessor, BevPreprocessor
|
| 26 |
+
core_method: 'RgbPreprocessor'
|
| 27 |
+
args:
|
| 28 |
+
bgr2rgb: true
|
| 29 |
+
resize_x: &image_width 512
|
| 30 |
+
resize_y: &image_height 512
|
| 31 |
+
mean: [0.485, 0.456, 0.406]
|
| 32 |
+
std: [0.229, 0.224, 0.225]
|
| 33 |
+
# object evaluation range
|
| 34 |
+
cav_lidar_range: &cav_lidar [-50, -50, -3, 50, 50, 1]
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
# anchor box related
|
| 38 |
+
postprocess:
|
| 39 |
+
core_method: 'CameraBevPostprocessor' # VoxelPostprocessor, BevPostprocessor supported
|
| 40 |
+
anchor_args:
|
| 41 |
+
cav_lidar_range: *cav_lidar
|
| 42 |
+
order: 'hwl' # hwl or lwh
|
| 43 |
+
max_num: 100 # maximum number of objects in a single frame. use this number to make sure different frames has the same dimension in the same batch
|
| 44 |
+
nms_thresh: 0.15
|
| 45 |
+
|
| 46 |
+
model:
|
| 47 |
+
core_method: corpbevt
|
| 48 |
+
args:
|
| 49 |
+
target: &target 'dynamic' #'dynamic' dynamic, static or both
|
| 50 |
+
max_cav: *max_cav
|
| 51 |
+
encoder:
|
| 52 |
+
num_layers: 34
|
| 53 |
+
pretrained: true
|
| 54 |
+
image_width: *image_width
|
| 55 |
+
image_height: *image_height
|
| 56 |
+
id_pick: [1, 2, 3]
|
| 57 |
+
|
| 58 |
+
compression: 64 #1 #0.2 #2 #0 #64 #0 #8 #64 #0 # compression rate
|
| 59 |
+
|
| 60 |
+
decoder:
|
| 61 |
+
input_dim: 128
|
| 62 |
+
num_layer: 3
|
| 63 |
+
num_ch_dec: &decoder_block [32, 64, 128]
|
| 64 |
+
|
| 65 |
+
fax:
|
| 66 |
+
dim: [128, 128, 128] # b, d, h w from resenet -> b 256 h w
|
| 67 |
+
middle: [2, 2, 2] # middle conv
|
| 68 |
+
bev_embedding:
|
| 69 |
+
sigma: 1.0
|
| 70 |
+
bev_height: 256
|
| 71 |
+
bev_width: 256
|
| 72 |
+
h_meters: 100
|
| 73 |
+
w_meters: 100
|
| 74 |
+
offset: 0.0
|
| 75 |
+
upsample_scales: [2, 4, 8]
|
| 76 |
+
|
| 77 |
+
cross_view: #cross_view attention
|
| 78 |
+
image_height: *image_height
|
| 79 |
+
image_width: *image_width
|
| 80 |
+
no_image_features: False
|
| 81 |
+
skip: True
|
| 82 |
+
heads: [4, 4, 4]
|
| 83 |
+
dim_head: [32, 32, 32]
|
| 84 |
+
qkv_bias: True
|
| 85 |
+
|
| 86 |
+
cross_view_swap:
|
| 87 |
+
rel_pos_emb: False
|
| 88 |
+
q_win_size: [ [ 16, 16 ], [ 16, 16 ], [ 32, 32 ] ]
|
| 89 |
+
feat_win_size: [ [ 8, 8 ], [ 8, 8 ], [ 16, 16 ] ]
|
| 90 |
+
bev_embedding_flag: [ true, false, false ]
|
| 91 |
+
|
| 92 |
+
self_attn:
|
| 93 |
+
dim_head: 32
|
| 94 |
+
dropout: 0.1
|
| 95 |
+
window_size: 32
|
| 96 |
+
|
| 97 |
+
sttf: &sttf
|
| 98 |
+
resolution: 0.390625 # m/pixel
|
| 99 |
+
downsample_rate: 8
|
| 100 |
+
use_roi_mask: true
|
| 101 |
+
|
| 102 |
+
fax_fusion:
|
| 103 |
+
input_dim: 128
|
| 104 |
+
mlp_dim: 256
|
| 105 |
+
agent_size: *max_cav
|
| 106 |
+
window_size: 8
|
| 107 |
+
dim_head: 32
|
| 108 |
+
drop_out: 0.1
|
| 109 |
+
depth: 3
|
| 110 |
+
mask: true
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
seg_head_dim: 32
|
| 114 |
+
output_class: 2
|
| 115 |
+
|
| 116 |
+
loss:
|
| 117 |
+
core_method: vanilla_seg_loss
|
| 118 |
+
args:
|
| 119 |
+
target: *target
|
| 120 |
+
d_weights: 75.0
|
| 121 |
+
s_weights: 15.0
|
| 122 |
+
d_coe: 2.0
|
| 123 |
+
s_coe: 0.0
|
| 124 |
+
|
| 125 |
+
optimizer:
|
| 126 |
+
core_method: AdamW
|
| 127 |
+
lr: 2e-4
|
| 128 |
+
args:
|
| 129 |
+
eps: 1e-10
|
| 130 |
+
weight_decay: 1e-2
|
| 131 |
+
|
| 132 |
+
lr_scheduler:
|
| 133 |
+
core_method: cosineannealwarm #step, multistep, Exponential and cosineannealwarm support
|
| 134 |
+
epoches: *epoches
|
| 135 |
+
warmup_lr: 2e-5
|
| 136 |
+
warmup_epoches: 10
|
| 137 |
+
lr_min: 5e-6
|
compression_exp/compression_64/net_epoch71.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:86b69e637c73e6c86000e2e9ca91edda48172da46a80524edd2d2510cde19941
|
| 3 |
+
size 114594055
|
compression_exp/compression_8/config.yaml
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: fax # only used for demonstration data api
|
| 2 |
+
root_dir: '/data/s2/semantic-opv2v/train'
|
| 3 |
+
validate_dir: '/data/s2/semantic-opv2v/test'
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
train_params:
|
| 7 |
+
batch_size: &batch_size 1
|
| 8 |
+
epoches: &epoches 71
|
| 9 |
+
eval_freq: 5
|
| 10 |
+
save_freq: 5
|
| 11 |
+
max_cav: &max_cav 5
|
| 12 |
+
visible: true
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
fusion:
|
| 16 |
+
core_method: 'CamIntermediateFusionDataset' # LateFusionDataset, EarlyFusionDataset, IntermediateFusionDataset supported
|
| 17 |
+
args: []
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
data_augment: []
|
| 21 |
+
add_data_extension: ['bev_dynamic.png', 'bev_static.png', 'bev_lane.png', 'bev_visibility.png', 'bev_visibility_corp.png']
|
| 22 |
+
|
| 23 |
+
# preprocess-related
|
| 24 |
+
preprocess:
|
| 25 |
+
# options: BasePreprocessor, VoxelPreprocessor, BevPreprocessor
|
| 26 |
+
core_method: 'RgbPreprocessor'
|
| 27 |
+
args:
|
| 28 |
+
bgr2rgb: true
|
| 29 |
+
resize_x: &image_width 512
|
| 30 |
+
resize_y: &image_height 512
|
| 31 |
+
mean: [0.485, 0.456, 0.406]
|
| 32 |
+
std: [0.229, 0.224, 0.225]
|
| 33 |
+
# object evaluation range
|
| 34 |
+
cav_lidar_range: &cav_lidar [-50, -50, -3, 50, 50, 1]
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
# anchor box related
|
| 38 |
+
postprocess:
|
| 39 |
+
core_method: 'CameraBevPostprocessor' # VoxelPostprocessor, BevPostprocessor supported
|
| 40 |
+
anchor_args:
|
| 41 |
+
cav_lidar_range: *cav_lidar
|
| 42 |
+
order: 'hwl' # hwl or lwh
|
| 43 |
+
max_num: 100 # maximum number of objects in a single frame. use this number to make sure different frames has the same dimension in the same batch
|
| 44 |
+
nms_thresh: 0.15
|
| 45 |
+
|
| 46 |
+
model:
|
| 47 |
+
core_method: corpbevt
|
| 48 |
+
args:
|
| 49 |
+
target: &target 'dynamic' #'dynamic' dynamic, static or both
|
| 50 |
+
max_cav: *max_cav
|
| 51 |
+
encoder:
|
| 52 |
+
num_layers: 34
|
| 53 |
+
pretrained: true
|
| 54 |
+
image_width: *image_width
|
| 55 |
+
image_height: *image_height
|
| 56 |
+
id_pick: [1, 2, 3]
|
| 57 |
+
|
| 58 |
+
compression: 8 #0.2 #2 #0 #64 #0 #8 #64 #0 # compression rate
|
| 59 |
+
|
| 60 |
+
decoder:
|
| 61 |
+
input_dim: 128
|
| 62 |
+
num_layer: 3
|
| 63 |
+
num_ch_dec: &decoder_block [32, 64, 128]
|
| 64 |
+
|
| 65 |
+
fax:
|
| 66 |
+
dim: [128, 128, 128] # b, d, h w from resenet -> b 256 h w
|
| 67 |
+
middle: [2, 2, 2] # middle conv
|
| 68 |
+
bev_embedding:
|
| 69 |
+
sigma: 1.0
|
| 70 |
+
bev_height: 256
|
| 71 |
+
bev_width: 256
|
| 72 |
+
h_meters: 100
|
| 73 |
+
w_meters: 100
|
| 74 |
+
offset: 0.0
|
| 75 |
+
upsample_scales: [2, 4, 8]
|
| 76 |
+
|
| 77 |
+
cross_view: #cross_view attention
|
| 78 |
+
image_height: *image_height
|
| 79 |
+
image_width: *image_width
|
| 80 |
+
no_image_features: False
|
| 81 |
+
skip: True
|
| 82 |
+
heads: [4, 4, 4]
|
| 83 |
+
dim_head: [32, 32, 32]
|
| 84 |
+
qkv_bias: True
|
| 85 |
+
|
| 86 |
+
cross_view_swap:
|
| 87 |
+
rel_pos_emb: False
|
| 88 |
+
q_win_size: [ [ 16, 16 ], [ 16, 16 ], [ 32, 32 ] ]
|
| 89 |
+
feat_win_size: [ [ 8, 8 ], [ 8, 8 ], [ 16, 16 ] ]
|
| 90 |
+
bev_embedding_flag: [ true, false, false ]
|
| 91 |
+
|
| 92 |
+
self_attn:
|
| 93 |
+
dim_head: 32
|
| 94 |
+
dropout: 0.1
|
| 95 |
+
window_size: 32
|
| 96 |
+
|
| 97 |
+
sttf: &sttf
|
| 98 |
+
resolution: 0.390625 # m/pixel
|
| 99 |
+
downsample_rate: 8
|
| 100 |
+
use_roi_mask: true
|
| 101 |
+
|
| 102 |
+
fax_fusion:
|
| 103 |
+
input_dim: 128
|
| 104 |
+
mlp_dim: 256
|
| 105 |
+
agent_size: *max_cav
|
| 106 |
+
window_size: 8
|
| 107 |
+
dim_head: 32
|
| 108 |
+
drop_out: 0.1
|
| 109 |
+
depth: 3
|
| 110 |
+
mask: true
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
seg_head_dim: 32
|
| 114 |
+
output_class: 2
|
| 115 |
+
|
| 116 |
+
loss:
|
| 117 |
+
core_method: vanilla_seg_loss
|
| 118 |
+
args:
|
| 119 |
+
target: *target
|
| 120 |
+
d_weights: 75.0
|
| 121 |
+
s_weights: 15.0
|
| 122 |
+
d_coe: 2.0
|
| 123 |
+
s_coe: 0.0
|
| 124 |
+
|
| 125 |
+
optimizer:
|
| 126 |
+
core_method: AdamW
|
| 127 |
+
lr: 2e-4
|
| 128 |
+
args:
|
| 129 |
+
eps: 1e-10
|
| 130 |
+
weight_decay: 1e-2
|
| 131 |
+
|
| 132 |
+
lr_scheduler:
|
| 133 |
+
core_method: cosineannealwarm #step, multistep, Exponential and cosineannealwarm support
|
| 134 |
+
epoches: *epoches
|
| 135 |
+
warmup_lr: 2e-5
|
| 136 |
+
warmup_epoches: 10
|
| 137 |
+
lr_min: 5e-6
|
compression_exp/compression_8/net_epoch71.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1f076cba4beefd596c69a4f781bf5719225561083fe0329949b89ae98b93cbcc
|
| 3 |
+
size 114608711
|