Theia-4869 commited on
Commit
267be3d
·
verified ·
1 Parent(s): 724bc78

Upload organ_tumor_mask_box_point_512/config.yaml with huggingface_hub

Browse files
organ_tumor_mask_box_point_512/config.yaml ADDED
@@ -0,0 +1,323 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ scratch:
2
+ resolution: 512
3
+ batch_size: 8
4
+ num_workers: 10
5
+ num_frames: 8
6
+ max_num_objects: 1
7
+ base_lr: 5.0e-05
8
+ vision_lr: 3.0e-05
9
+ phases_per_epoch: 1
10
+ num_epochs: 40
11
+ dataset:
12
+ organ_data_folder: /mnt/bn/bes-nas-zqz-lq-v6arnold6/mlx/users/zhangqizhe/data/TumorSegDB/v2.0/organ/internal/train
13
+ organ_metadata_file: /mnt/bn/bes-nas-zqz-lq-v6arnold6/mlx/users/zhangqizhe/data/TumorSegDB/v2.0/organ.jsonl
14
+ tumor_data_folder: /mnt/bn/bes-nas-zqz-lq-v6arnold6/mlx/users/zhangqizhe/data/TumorSegDB/v2.0/tumor/internal/train
15
+ tumor_metadata_file: /mnt/bn/bes-nas-zqz-lq-v6arnold6/mlx/users/zhangqizhe/data/TumorSegDB/v2.0/tumor.jsonl
16
+ multiplier: 1
17
+ vos:
18
+ transforms:
19
+ - _target_: training.dataset.transforms.ComposeAPI
20
+ transforms:
21
+ - _target_: training.dataset.transforms.RandomResizeAPI
22
+ sizes: ${scratch.resolution}
23
+ square: true
24
+ consistent_transform: true
25
+ - _target_: training.dataset.transforms.ToTensorAPI
26
+ - _target_: training.dataset.transforms.NormalizeAPI
27
+ mean:
28
+ - 0.485
29
+ - 0.456
30
+ - 0.406
31
+ std:
32
+ - 0.229
33
+ - 0.224
34
+ - 0.225
35
+ trainer:
36
+ _target_: training.trainer.Trainer
37
+ mode: train_only
38
+ max_epochs: ${times:${scratch.num_epochs},${scratch.phases_per_epoch}}
39
+ accelerator: cuda
40
+ seed_value: 123
41
+ data:
42
+ train:
43
+ _target_: training.dataset.sam2_datasets.TorchTrainMixedDataset
44
+ phases_per_epoch: ${scratch.phases_per_epoch}
45
+ batch_sizes:
46
+ - ${scratch.batch_size}
47
+ datasets:
48
+ - _target_: training.dataset.utils.RepeatFactorWrapper
49
+ dataset:
50
+ _target_: training.dataset.utils.ConcatDataset
51
+ datasets:
52
+ - _target_: training.dataset.vos_dataset.VOSDataset
53
+ transforms: ${vos.transforms}
54
+ training: true
55
+ video_dataset:
56
+ _target_: training.dataset.vos_raw_dataset.NPZRawDataset
57
+ data_folder: ${dataset.organ_data_folder}
58
+ metadata_file: ${dataset.organ_metadata_file}
59
+ dimension: 3
60
+ num_frames: ${scratch.num_frames}
61
+ sampler:
62
+ _target_: training.dataset.vos_sampler.RandomUniformSampler
63
+ num_frames: ${scratch.num_frames}
64
+ max_num_objects: ${scratch.max_num_objects}
65
+ multiplier: ${dataset.multiplier}
66
+ - _target_: training.dataset.vos_dataset.VOSDataset
67
+ transforms: ${vos.transforms}
68
+ training: true
69
+ video_dataset:
70
+ _target_: training.dataset.vos_raw_dataset.NPZRawDataset
71
+ data_folder: ${dataset.tumor_data_folder}
72
+ metadata_file: ${dataset.tumor_metadata_file}
73
+ dimension: 3
74
+ num_frames: ${scratch.num_frames}
75
+ sampler:
76
+ _target_: training.dataset.vos_sampler.RandomUniformSampler
77
+ num_frames: ${scratch.num_frames}
78
+ max_num_objects: ${scratch.max_num_objects}
79
+ multiplier: ${dataset.multiplier}
80
+ shuffle: true
81
+ num_workers: ${scratch.num_workers}
82
+ pin_memory: true
83
+ drop_last: true
84
+ collate_fn:
85
+ _target_: training.utils.data_utils.collate_fn
86
+ _partial_: true
87
+ dict_key: all
88
+ model:
89
+ _target_: training.model.sam2.SAM2Train
90
+ image_encoder:
91
+ _target_: sam2.modeling.backbones.image_encoder.ImageEncoder
92
+ scalp: 1
93
+ trunk:
94
+ _target_: sam2.modeling.backbones.hieradet.Hiera
95
+ embed_dim: 96
96
+ num_heads: 1
97
+ stages:
98
+ - 1
99
+ - 2
100
+ - 7
101
+ - 2
102
+ global_att_blocks:
103
+ - 5
104
+ - 7
105
+ - 9
106
+ window_pos_embed_bkg_spatial_size:
107
+ - 7
108
+ - 7
109
+ neck:
110
+ _target_: sam2.modeling.backbones.image_encoder.FpnNeck
111
+ position_encoding:
112
+ _target_: sam2.modeling.position_encoding.PositionEmbeddingSine
113
+ num_pos_feats: 256
114
+ normalize: true
115
+ scale: null
116
+ temperature: 10000
117
+ d_model: 256
118
+ backbone_channel_list:
119
+ - 768
120
+ - 384
121
+ - 192
122
+ - 96
123
+ fpn_top_down_levels:
124
+ - 2
125
+ - 3
126
+ fpn_interp_model: nearest
127
+ memory_attention:
128
+ _target_: sam2.modeling.memory_attention.MemoryAttention
129
+ d_model: 256
130
+ pos_enc_at_input: true
131
+ layer:
132
+ _target_: sam2.modeling.memory_attention.MemoryAttentionLayer
133
+ activation: relu
134
+ dim_feedforward: 2048
135
+ dropout: 0.1
136
+ pos_enc_at_attn: false
137
+ self_attention:
138
+ _target_: sam2.modeling.sam.transformer.RoPEAttention
139
+ rope_theta: 10000.0
140
+ feat_sizes:
141
+ - 32
142
+ - 32
143
+ embedding_dim: 256
144
+ num_heads: 1
145
+ downsample_rate: 1
146
+ dropout: 0.1
147
+ d_model: 256
148
+ pos_enc_at_cross_attn_keys: true
149
+ pos_enc_at_cross_attn_queries: false
150
+ cross_attention:
151
+ _target_: sam2.modeling.sam.transformer.RoPEAttention
152
+ rope_theta: 10000.0
153
+ feat_sizes:
154
+ - 32
155
+ - 32
156
+ rope_k_repeat: true
157
+ embedding_dim: 256
158
+ num_heads: 1
159
+ downsample_rate: 1
160
+ dropout: 0.1
161
+ kv_in_dim: 64
162
+ num_layers: 4
163
+ memory_encoder:
164
+ _target_: sam2.modeling.memory_encoder.MemoryEncoder
165
+ out_dim: 64
166
+ position_encoding:
167
+ _target_: sam2.modeling.position_encoding.PositionEmbeddingSine
168
+ num_pos_feats: 64
169
+ normalize: true
170
+ scale: null
171
+ temperature: 10000
172
+ mask_downsampler:
173
+ _target_: sam2.modeling.memory_encoder.MaskDownSampler
174
+ kernel_size: 3
175
+ stride: 2
176
+ padding: 1
177
+ fuser:
178
+ _target_: sam2.modeling.memory_encoder.Fuser
179
+ layer:
180
+ _target_: sam2.modeling.memory_encoder.CXBlock
181
+ dim: 256
182
+ kernel_size: 7
183
+ padding: 3
184
+ layer_scale_init_value: 1.0e-06
185
+ use_dwconv: true
186
+ num_layers: 2
187
+ num_maskmem: 7
188
+ image_size: ${scratch.resolution}
189
+ num_semantic_tokens: 14
190
+ semantic_prompt_level: 2
191
+ sigmoid_scale_for_mem_enc: 20.0
192
+ sigmoid_bias_for_mem_enc: -10.0
193
+ use_mask_input_as_output_without_sam: true
194
+ directly_add_no_mem_embed: true
195
+ no_obj_embed_spatial: true
196
+ use_high_res_features_in_sam: true
197
+ multimask_output_in_sam: true
198
+ iou_prediction_use_sigmoid: true
199
+ use_obj_ptrs_in_encoder: true
200
+ add_tpos_enc_to_obj_ptrs: true
201
+ proj_tpos_enc_in_obj_ptrs: true
202
+ use_signed_tpos_enc_to_obj_ptrs: true
203
+ only_obj_ptrs_in_the_past_for_eval: true
204
+ pred_obj_scores: true
205
+ pred_obj_scores_mlp: true
206
+ fixed_no_obj_ptr: true
207
+ multimask_output_for_tracking: true
208
+ use_multimask_token_for_obj_ptr: true
209
+ multimask_min_pt_num: 0
210
+ multimask_max_pt_num: 1
211
+ use_mlp_for_obj_ptr_proj: true
212
+ prob_to_use_pt_input_for_train: 0.5
213
+ prob_to_use_pt_input_for_eval: 0.0
214
+ prob_to_use_box_input_for_train: 0.5
215
+ prob_to_use_box_input_for_eval: 1.0
216
+ prob_to_use_mask_input_for_train: 0.5
217
+ prob_to_use_mask_input_for_eval: 0.0
218
+ prob_to_sample_from_gt_for_train: 0.1
219
+ num_frames_to_correct_for_train: 2
220
+ num_frames_to_correct_for_eval: 1
221
+ rand_frames_to_correct_for_train: true
222
+ add_all_frames_to_correct_as_cond: true
223
+ num_init_cond_frames_for_train: 2
224
+ rand_init_cond_frames_for_train: true
225
+ num_correction_pt_per_frame: 7
226
+ use_act_ckpt_iterative_pt_sampling: false
227
+ num_init_cond_frames_for_eval: 1
228
+ forward_backbone_per_frame_for_eval: false
229
+ logging:
230
+ tensorboard_writer:
231
+ _target_: training.utils.logger.make_tensorboard_logger
232
+ log_dir: ${launcher.experiment_log_dir}/tensorboard
233
+ flush_secs: 120
234
+ should_log: true
235
+ log_dir: ${launcher.experiment_log_dir}/logs
236
+ log_freq: 10
237
+ checkpoint:
238
+ save_dir: ${launcher.experiment_log_dir}/checkpoints
239
+ save_freq: 5
240
+ model_weight_initializer:
241
+ _partial_: true
242
+ _target_: training.utils.checkpoint_utils.load_state_dict_into_model
243
+ strict: true
244
+ ignore_unexpected_keys: null
245
+ ignore_missing_keys:
246
+ - sam_mask_decoder.semantic_tokens.weight
247
+ state_dict:
248
+ _target_: training.utils.checkpoint_utils.load_checkpoint_and_apply_kernels
249
+ checkpoint_path: /mnt/bn/bes-nas-zqz-lq-v6arnold6/mlx/users/zhangqizhe/model/SAM2.1/sam2.1_hiera_tiny.pt
250
+ ckpt_state_dict_keys:
251
+ - model
252
+ distributed:
253
+ backend: nccl
254
+ find_unused_parameters: true
255
+ optim:
256
+ amp:
257
+ enabled: true
258
+ amp_dtype: bfloat16
259
+ optimizer:
260
+ _target_: torch.optim.AdamW
261
+ gradient_clip:
262
+ _target_: training.optimizer.GradientClipper
263
+ max_norm: 0.1
264
+ norm_type: 2
265
+ param_group_modifiers:
266
+ - _target_: training.optimizer.layer_decay_param_modifier
267
+ _partial_: true
268
+ layer_decay_value: 0.9
269
+ apply_to: image_encoder.trunk
270
+ overrides:
271
+ - pattern: '*pos_embed*'
272
+ value: 1.0
273
+ options:
274
+ lr:
275
+ - scheduler:
276
+ _target_: fvcore.common.param_scheduler.CosineParamScheduler
277
+ start_value: ${scratch.base_lr}
278
+ end_value: ${divide:${scratch.base_lr},10}
279
+ - scheduler:
280
+ _target_: fvcore.common.param_scheduler.CosineParamScheduler
281
+ start_value: ${scratch.vision_lr}
282
+ end_value: ${divide:${scratch.vision_lr},10}
283
+ param_names:
284
+ - image_encoder.*
285
+ weight_decay:
286
+ - scheduler:
287
+ _target_: fvcore.common.param_scheduler.ConstantParamScheduler
288
+ value: 0.1
289
+ - scheduler:
290
+ _target_: fvcore.common.param_scheduler.ConstantParamScheduler
291
+ value: 0.0
292
+ param_names:
293
+ - '*bias*'
294
+ module_cls_names:
295
+ - torch.nn.LayerNorm
296
+ loss:
297
+ all:
298
+ _target_: training.loss_fns.MultiStepMultiMasksAndIous
299
+ weight_dict:
300
+ loss_mask: 20
301
+ loss_dice: 1
302
+ loss_iou: 1
303
+ loss_class: 1
304
+ supervise_all_iou: true
305
+ iou_use_l1_loss: true
306
+ pred_obj_scores: true
307
+ focal_alpha_obj_score: -1.0
308
+ focal_gamma_obj_score: 0
309
+ launcher:
310
+ num_nodes: 1
311
+ gpus_per_node: 8
312
+ experiment_log_dir: /mnt/bn/bes-nas-zqz-lq-v6arnold6/mlx/users/zhangqizhe/code/Med/TumorSeg/logs/train/sam2.1/hiera_t/organ_tumor_mask_box_point_512
313
+ submitit:
314
+ partition: null
315
+ account: null
316
+ qos: null
317
+ cpus_per_task: 10
318
+ use_cluster: false
319
+ timeout_hour: 24
320
+ name: null
321
+ port_range:
322
+ - 10000
323
+ - 65000