Theia-4869 commited on
Commit
7d98720
·
verified ·
1 Parent(s): ea48ad2

Upload none_mask_box_point_512/config.yaml with huggingface_hub

Browse files
Files changed (1) hide show
  1. none_mask_box_point_512/config.yaml +306 -0
none_mask_box_point_512/config.yaml ADDED
@@ -0,0 +1,306 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ scratch:
2
+ resolution: 512
3
+ batch_size: 8
4
+ num_workers: 10
5
+ num_frames: 8
6
+ max_num_objects: 1
7
+ base_lr: 5.0e-05
8
+ vision_lr: 3.0e-05
9
+ phases_per_epoch: 1
10
+ num_epochs: 40
11
+ dataset:
12
+ data_folder: /mnt/bn/bes-nas-zqz-lq-v6arnold6/mlx/users/zhangqizhe/data/TumorSegDB/v2.0/tumor/internal/train
13
+ metadata_file: /mnt/bn/bes-nas-zqz-lq-v6arnold6/mlx/users/zhangqizhe/data/TumorSegDB/v2.0/tumor.jsonl
14
+ multiplier: 1
15
+ vos:
16
+ transforms:
17
+ - _target_: training.dataset.transforms.ComposeAPI
18
+ transforms:
19
+ - _target_: training.dataset.transforms.RandomResizeAPI
20
+ sizes: ${scratch.resolution}
21
+ square: true
22
+ consistent_transform: true
23
+ - _target_: training.dataset.transforms.ToTensorAPI
24
+ - _target_: training.dataset.transforms.NormalizeAPI
25
+ mean:
26
+ - 0.485
27
+ - 0.456
28
+ - 0.406
29
+ std:
30
+ - 0.229
31
+ - 0.224
32
+ - 0.225
33
+ trainer:
34
+ _target_: training.trainer.Trainer
35
+ mode: train_only
36
+ max_epochs: ${times:${scratch.num_epochs},${scratch.phases_per_epoch}}
37
+ accelerator: cuda
38
+ seed_value: 123
39
+ data:
40
+ train:
41
+ _target_: training.dataset.sam2_datasets.TorchTrainMixedDataset
42
+ phases_per_epoch: ${scratch.phases_per_epoch}
43
+ batch_sizes:
44
+ - ${scratch.batch_size}
45
+ datasets:
46
+ - _target_: training.dataset.utils.RepeatFactorWrapper
47
+ dataset:
48
+ _target_: training.dataset.utils.ConcatDataset
49
+ datasets:
50
+ - _target_: training.dataset.vos_dataset.VOSDataset
51
+ transforms: ${vos.transforms}
52
+ training: true
53
+ video_dataset:
54
+ _target_: training.dataset.vos_raw_dataset.NPZRawDataset
55
+ data_folder: ${dataset.data_folder}
56
+ metadata_file: ${dataset.metadata_file}
57
+ dimension: 3
58
+ num_frames: ${scratch.num_frames}
59
+ sampler:
60
+ _target_: training.dataset.vos_sampler.RandomUniformSampler
61
+ num_frames: ${scratch.num_frames}
62
+ max_num_objects: ${scratch.max_num_objects}
63
+ multiplier: ${dataset.multiplier}
64
+ shuffle: true
65
+ num_workers: ${scratch.num_workers}
66
+ pin_memory: true
67
+ drop_last: true
68
+ collate_fn:
69
+ _target_: training.utils.data_utils.collate_fn
70
+ _partial_: true
71
+ dict_key: all
72
+ model:
73
+ _target_: training.model.sam2.SAM2Train
74
+ image_encoder:
75
+ _target_: sam2.modeling.backbones.image_encoder.ImageEncoder
76
+ scalp: 1
77
+ trunk:
78
+ _target_: sam2.modeling.backbones.hieradet.Hiera
79
+ embed_dim: 96
80
+ num_heads: 1
81
+ stages:
82
+ - 1
83
+ - 2
84
+ - 7
85
+ - 2
86
+ global_att_blocks:
87
+ - 5
88
+ - 7
89
+ - 9
90
+ window_pos_embed_bkg_spatial_size:
91
+ - 7
92
+ - 7
93
+ neck:
94
+ _target_: sam2.modeling.backbones.image_encoder.FpnNeck
95
+ position_encoding:
96
+ _target_: sam2.modeling.position_encoding.PositionEmbeddingSine
97
+ num_pos_feats: 256
98
+ normalize: true
99
+ scale: null
100
+ temperature: 10000
101
+ d_model: 256
102
+ backbone_channel_list:
103
+ - 768
104
+ - 384
105
+ - 192
106
+ - 96
107
+ fpn_top_down_levels:
108
+ - 2
109
+ - 3
110
+ fpn_interp_model: nearest
111
+ memory_attention:
112
+ _target_: sam2.modeling.memory_attention.MemoryAttention
113
+ d_model: 256
114
+ pos_enc_at_input: true
115
+ layer:
116
+ _target_: sam2.modeling.memory_attention.MemoryAttentionLayer
117
+ activation: relu
118
+ dim_feedforward: 2048
119
+ dropout: 0.1
120
+ pos_enc_at_attn: false
121
+ self_attention:
122
+ _target_: sam2.modeling.sam.transformer.RoPEAttention
123
+ rope_theta: 10000.0
124
+ feat_sizes:
125
+ - 32
126
+ - 32
127
+ embedding_dim: 256
128
+ num_heads: 1
129
+ downsample_rate: 1
130
+ dropout: 0.1
131
+ d_model: 256
132
+ pos_enc_at_cross_attn_keys: true
133
+ pos_enc_at_cross_attn_queries: false
134
+ cross_attention:
135
+ _target_: sam2.modeling.sam.transformer.RoPEAttention
136
+ rope_theta: 10000.0
137
+ feat_sizes:
138
+ - 32
139
+ - 32
140
+ rope_k_repeat: true
141
+ embedding_dim: 256
142
+ num_heads: 1
143
+ downsample_rate: 1
144
+ dropout: 0.1
145
+ kv_in_dim: 64
146
+ num_layers: 4
147
+ memory_encoder:
148
+ _target_: sam2.modeling.memory_encoder.MemoryEncoder
149
+ out_dim: 64
150
+ position_encoding:
151
+ _target_: sam2.modeling.position_encoding.PositionEmbeddingSine
152
+ num_pos_feats: 64
153
+ normalize: true
154
+ scale: null
155
+ temperature: 10000
156
+ mask_downsampler:
157
+ _target_: sam2.modeling.memory_encoder.MaskDownSampler
158
+ kernel_size: 3
159
+ stride: 2
160
+ padding: 1
161
+ fuser:
162
+ _target_: sam2.modeling.memory_encoder.Fuser
163
+ layer:
164
+ _target_: sam2.modeling.memory_encoder.CXBlock
165
+ dim: 256
166
+ kernel_size: 7
167
+ padding: 3
168
+ layer_scale_init_value: 1.0e-06
169
+ use_dwconv: true
170
+ num_layers: 2
171
+ num_maskmem: 7
172
+ image_size: ${scratch.resolution}
173
+ num_semantic_tokens: 0
174
+ semantic_prompt_level: -1
175
+ sigmoid_scale_for_mem_enc: 20.0
176
+ sigmoid_bias_for_mem_enc: -10.0
177
+ use_mask_input_as_output_without_sam: true
178
+ directly_add_no_mem_embed: true
179
+ no_obj_embed_spatial: true
180
+ use_high_res_features_in_sam: true
181
+ multimask_output_in_sam: true
182
+ iou_prediction_use_sigmoid: true
183
+ use_obj_ptrs_in_encoder: true
184
+ add_tpos_enc_to_obj_ptrs: true
185
+ proj_tpos_enc_in_obj_ptrs: true
186
+ use_signed_tpos_enc_to_obj_ptrs: true
187
+ only_obj_ptrs_in_the_past_for_eval: true
188
+ pred_obj_scores: true
189
+ pred_obj_scores_mlp: true
190
+ fixed_no_obj_ptr: true
191
+ multimask_output_for_tracking: true
192
+ use_multimask_token_for_obj_ptr: true
193
+ multimask_min_pt_num: 0
194
+ multimask_max_pt_num: 1
195
+ use_mlp_for_obj_ptr_proj: true
196
+ prob_to_use_pt_input_for_train: 0.5
197
+ prob_to_use_pt_input_for_eval: 0.0
198
+ prob_to_use_box_input_for_train: 0.5
199
+ prob_to_use_box_input_for_eval: 1.0
200
+ prob_to_use_mask_input_for_train: 0.5
201
+ prob_to_use_mask_input_for_eval: 0.0
202
+ prob_to_sample_from_gt_for_train: 0.1
203
+ num_frames_to_correct_for_train: 2
204
+ num_frames_to_correct_for_eval: 1
205
+ rand_frames_to_correct_for_train: true
206
+ add_all_frames_to_correct_as_cond: true
207
+ num_init_cond_frames_for_train: 2
208
+ rand_init_cond_frames_for_train: true
209
+ num_correction_pt_per_frame: 7
210
+ use_act_ckpt_iterative_pt_sampling: false
211
+ num_init_cond_frames_for_eval: 1
212
+ forward_backbone_per_frame_for_eval: false
213
+ logging:
214
+ tensorboard_writer:
215
+ _target_: training.utils.logger.make_tensorboard_logger
216
+ log_dir: ${launcher.experiment_log_dir}/tensorboard
217
+ flush_secs: 120
218
+ should_log: true
219
+ log_dir: ${launcher.experiment_log_dir}/logs
220
+ log_freq: 10
221
+ checkpoint:
222
+ save_dir: ${launcher.experiment_log_dir}/checkpoints
223
+ save_freq: 5
224
+ model_weight_initializer:
225
+ _partial_: true
226
+ _target_: training.utils.checkpoint_utils.load_state_dict_into_model
227
+ strict: true
228
+ ignore_unexpected_keys: null
229
+ ignore_missing_keys: null
230
+ state_dict:
231
+ _target_: training.utils.checkpoint_utils.load_checkpoint_and_apply_kernels
232
+ checkpoint_path: /mnt/bn/bes-nas-zqz-lq-v6arnold6/mlx/users/zhangqizhe/model/SAM2.1/sam2.1_hiera_tiny.pt
233
+ ckpt_state_dict_keys:
234
+ - model
235
+ distributed:
236
+ backend: nccl
237
+ find_unused_parameters: true
238
+ optim:
239
+ amp:
240
+ enabled: true
241
+ amp_dtype: bfloat16
242
+ optimizer:
243
+ _target_: torch.optim.AdamW
244
+ gradient_clip:
245
+ _target_: training.optimizer.GradientClipper
246
+ max_norm: 0.1
247
+ norm_type: 2
248
+ param_group_modifiers:
249
+ - _target_: training.optimizer.layer_decay_param_modifier
250
+ _partial_: true
251
+ layer_decay_value: 0.9
252
+ apply_to: image_encoder.trunk
253
+ overrides:
254
+ - pattern: '*pos_embed*'
255
+ value: 1.0
256
+ options:
257
+ lr:
258
+ - scheduler:
259
+ _target_: fvcore.common.param_scheduler.CosineParamScheduler
260
+ start_value: ${scratch.base_lr}
261
+ end_value: ${divide:${scratch.base_lr},10}
262
+ - scheduler:
263
+ _target_: fvcore.common.param_scheduler.CosineParamScheduler
264
+ start_value: ${scratch.vision_lr}
265
+ end_value: ${divide:${scratch.vision_lr},10}
266
+ param_names:
267
+ - image_encoder.*
268
+ weight_decay:
269
+ - scheduler:
270
+ _target_: fvcore.common.param_scheduler.ConstantParamScheduler
271
+ value: 0.1
272
+ - scheduler:
273
+ _target_: fvcore.common.param_scheduler.ConstantParamScheduler
274
+ value: 0.0
275
+ param_names:
276
+ - '*bias*'
277
+ module_cls_names:
278
+ - torch.nn.LayerNorm
279
+ loss:
280
+ all:
281
+ _target_: training.loss_fns.MultiStepMultiMasksAndIous
282
+ weight_dict:
283
+ loss_mask: 20
284
+ loss_dice: 1
285
+ loss_iou: 1
286
+ loss_class: 1
287
+ supervise_all_iou: true
288
+ iou_use_l1_loss: true
289
+ pred_obj_scores: true
290
+ focal_alpha_obj_score: -1.0
291
+ focal_gamma_obj_score: 0
292
+ launcher:
293
+ num_nodes: 1
294
+ gpus_per_node: 8
295
+ experiment_log_dir: /mnt/bn/bes-nas-zqz-lq-v6arnold6/mlx/users/zhangqizhe/code/Med/TumorSeg/logs/train/sam2.1/hiera_t/none_mask_box_point_512
296
+ submitit:
297
+ partition: null
298
+ account: null
299
+ qos: null
300
+ cpus_per_task: 10
301
+ use_cluster: false
302
+ timeout_hour: 24
303
+ name: null
304
+ port_range:
305
+ - 10000
306
+ - 65000