Theia-4869 commited on
Commit
5807536
·
verified ·
1 Parent(s): a4ec7ef

Upload one_512_2d/config.yaml with huggingface_hub

Browse files
Files changed (1) hide show
  1. one_512_2d/config.yaml +307 -0
one_512_2d/config.yaml ADDED
@@ -0,0 +1,307 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ scratch:
2
+ resolution: 512
3
+ batch_size: 64
4
+ num_workers: 10
5
+ num_frames: 1
6
+ max_num_objects: 1
7
+ base_lr: 5.0e-05
8
+ vision_lr: 3.0e-05
9
+ phases_per_epoch: 1
10
+ num_epochs: 40
11
+ dataset:
12
+ data_folder: /mnt/bn/bes-nas-zqz-lq-v6arnold6/mlx/users/zhangqizhe/data/TumorSegDB/v2.0/tumor/internal/train
13
+ metadata_file: /mnt/bn/bes-nas-zqz-lq-v6arnold6/mlx/users/zhangqizhe/data/TumorSegDB/v2.0/tumor.jsonl
14
+ multiplier: 1
15
+ vos:
16
+ transforms:
17
+ - _target_: training.dataset.transforms.ComposeAPI
18
+ transforms:
19
+ - _target_: training.dataset.transforms.RandomResizeAPI
20
+ sizes: ${scratch.resolution}
21
+ square: true
22
+ consistent_transform: true
23
+ - _target_: training.dataset.transforms.ToTensorAPI
24
+ - _target_: training.dataset.transforms.NormalizeAPI
25
+ mean:
26
+ - 0.485
27
+ - 0.456
28
+ - 0.406
29
+ std:
30
+ - 0.229
31
+ - 0.224
32
+ - 0.225
33
+ trainer:
34
+ _target_: training.trainer.Trainer
35
+ mode: train_only
36
+ max_epochs: ${times:${scratch.num_epochs},${scratch.phases_per_epoch}}
37
+ accelerator: cuda
38
+ seed_value: 123
39
+ data:
40
+ train:
41
+ _target_: training.dataset.sam2_datasets.TorchTrainMixedDataset
42
+ phases_per_epoch: ${scratch.phases_per_epoch}
43
+ batch_sizes:
44
+ - ${scratch.batch_size}
45
+ datasets:
46
+ - _target_: training.dataset.utils.RepeatFactorWrapper
47
+ dataset:
48
+ _target_: training.dataset.utils.ConcatDataset
49
+ datasets:
50
+ - _target_: training.dataset.vos_dataset.VOSDataset
51
+ transforms: ${vos.transforms}
52
+ training: true
53
+ video_dataset:
54
+ _target_: training.dataset.vos_raw_dataset.NPZRawDataset
55
+ data_folder: ${dataset.data_folder}
56
+ metadata_file: ${dataset.metadata_file}
57
+ dimension: 2
58
+ num_frames: ${scratch.num_frames}
59
+ sampler:
60
+ _target_: training.dataset.vos_sampler.RandomUniformSampler
61
+ num_frames: ${scratch.num_frames}
62
+ max_num_objects: ${scratch.max_num_objects}
63
+ multiplier: ${dataset.multiplier}
64
+ shuffle: true
65
+ num_workers: ${scratch.num_workers}
66
+ pin_memory: true
67
+ drop_last: true
68
+ collate_fn:
69
+ _target_: training.utils.data_utils.collate_fn
70
+ _partial_: true
71
+ dict_key: all
72
+ model:
73
+ _target_: training.model.sam2.SAM2Train
74
+ image_encoder:
75
+ _target_: sam2.modeling.backbones.image_encoder.ImageEncoder
76
+ scalp: 1
77
+ trunk:
78
+ _target_: sam2.modeling.backbones.hieradet.Hiera
79
+ embed_dim: 96
80
+ num_heads: 1
81
+ stages:
82
+ - 1
83
+ - 2
84
+ - 7
85
+ - 2
86
+ global_att_blocks:
87
+ - 5
88
+ - 7
89
+ - 9
90
+ window_pos_embed_bkg_spatial_size:
91
+ - 7
92
+ - 7
93
+ neck:
94
+ _target_: sam2.modeling.backbones.image_encoder.FpnNeck
95
+ position_encoding:
96
+ _target_: sam2.modeling.position_encoding.PositionEmbeddingSine
97
+ num_pos_feats: 256
98
+ normalize: true
99
+ scale: null
100
+ temperature: 10000
101
+ d_model: 256
102
+ backbone_channel_list:
103
+ - 768
104
+ - 384
105
+ - 192
106
+ - 96
107
+ fpn_top_down_levels:
108
+ - 2
109
+ - 3
110
+ fpn_interp_model: nearest
111
+ memory_attention:
112
+ _target_: sam2.modeling.memory_attention.MemoryAttention
113
+ d_model: 256
114
+ pos_enc_at_input: true
115
+ layer:
116
+ _target_: sam2.modeling.memory_attention.MemoryAttentionLayer
117
+ activation: relu
118
+ dim_feedforward: 2048
119
+ dropout: 0.1
120
+ pos_enc_at_attn: false
121
+ self_attention:
122
+ _target_: sam2.modeling.sam.transformer.RoPEAttention
123
+ rope_theta: 10000.0
124
+ feat_sizes:
125
+ - 32
126
+ - 32
127
+ embedding_dim: 256
128
+ num_heads: 1
129
+ downsample_rate: 1
130
+ dropout: 0.1
131
+ d_model: 256
132
+ pos_enc_at_cross_attn_keys: true
133
+ pos_enc_at_cross_attn_queries: false
134
+ cross_attention:
135
+ _target_: sam2.modeling.sam.transformer.RoPEAttention
136
+ rope_theta: 10000.0
137
+ feat_sizes:
138
+ - 32
139
+ - 32
140
+ rope_k_repeat: true
141
+ embedding_dim: 256
142
+ num_heads: 1
143
+ downsample_rate: 1
144
+ dropout: 0.1
145
+ kv_in_dim: 64
146
+ num_layers: 4
147
+ memory_encoder:
148
+ _target_: sam2.modeling.memory_encoder.MemoryEncoder
149
+ out_dim: 64
150
+ position_encoding:
151
+ _target_: sam2.modeling.position_encoding.PositionEmbeddingSine
152
+ num_pos_feats: 64
153
+ normalize: true
154
+ scale: null
155
+ temperature: 10000
156
+ mask_downsampler:
157
+ _target_: sam2.modeling.memory_encoder.MaskDownSampler
158
+ kernel_size: 3
159
+ stride: 2
160
+ padding: 1
161
+ fuser:
162
+ _target_: sam2.modeling.memory_encoder.Fuser
163
+ layer:
164
+ _target_: sam2.modeling.memory_encoder.CXBlock
165
+ dim: 256
166
+ kernel_size: 7
167
+ padding: 3
168
+ layer_scale_init_value: 1.0e-06
169
+ use_dwconv: true
170
+ num_layers: 2
171
+ num_maskmem: 7
172
+ image_size: ${scratch.resolution}
173
+ num_semantic_tokens: 1
174
+ semantic_prompt_level: 0
175
+ sigmoid_scale_for_mem_enc: 20.0
176
+ sigmoid_bias_for_mem_enc: -10.0
177
+ use_mask_input_as_output_without_sam: true
178
+ directly_add_no_mem_embed: true
179
+ no_obj_embed_spatial: true
180
+ use_high_res_features_in_sam: true
181
+ multimask_output_in_sam: true
182
+ iou_prediction_use_sigmoid: true
183
+ use_obj_ptrs_in_encoder: true
184
+ add_tpos_enc_to_obj_ptrs: true
185
+ proj_tpos_enc_in_obj_ptrs: true
186
+ use_signed_tpos_enc_to_obj_ptrs: true
187
+ only_obj_ptrs_in_the_past_for_eval: true
188
+ pred_obj_scores: true
189
+ pred_obj_scores_mlp: true
190
+ fixed_no_obj_ptr: true
191
+ multimask_output_for_tracking: true
192
+ use_multimask_token_for_obj_ptr: true
193
+ multimask_min_pt_num: 0
194
+ multimask_max_pt_num: 1
195
+ use_mlp_for_obj_ptr_proj: true
196
+ prob_to_use_pt_input_for_train: 0.0
197
+ prob_to_use_pt_input_for_eval: 0.0
198
+ prob_to_use_box_input_for_train: 0.5
199
+ prob_to_use_box_input_for_eval: 1.0
200
+ prob_to_use_mask_input_for_train: 0.0
201
+ prob_to_use_mask_input_for_eval: 0.0
202
+ prob_to_sample_from_gt_for_train: 0.1
203
+ num_frames_to_correct_for_train: 2
204
+ num_frames_to_correct_for_eval: 1
205
+ rand_frames_to_correct_for_train: true
206
+ add_all_frames_to_correct_as_cond: true
207
+ num_init_cond_frames_for_train: 2
208
+ rand_init_cond_frames_for_train: true
209
+ num_correction_pt_per_frame: 7
210
+ use_act_ckpt_iterative_pt_sampling: false
211
+ num_init_cond_frames_for_eval: 1
212
+ forward_backbone_per_frame_for_eval: false
213
+ logging:
214
+ tensorboard_writer:
215
+ _target_: training.utils.logger.make_tensorboard_logger
216
+ log_dir: ${launcher.experiment_log_dir}/tensorboard
217
+ flush_secs: 120
218
+ should_log: true
219
+ log_dir: ${launcher.experiment_log_dir}/logs
220
+ log_freq: 10
221
+ checkpoint:
222
+ save_dir: ${launcher.experiment_log_dir}/checkpoints
223
+ save_freq: 5
224
+ model_weight_initializer:
225
+ _partial_: true
226
+ _target_: training.utils.checkpoint_utils.load_state_dict_into_model
227
+ strict: true
228
+ ignore_unexpected_keys: null
229
+ ignore_missing_keys:
230
+ - sam_mask_decoder.semantic_tokens.weight
231
+ state_dict:
232
+ _target_: training.utils.checkpoint_utils.load_checkpoint_and_apply_kernels
233
+ checkpoint_path: /mnt/bn/bes-nas-zqz-lq-v6arnold6/mlx/users/zhangqizhe/model/SAM2.1/sam2.1_hiera_tiny.pt
234
+ ckpt_state_dict_keys:
235
+ - model
236
+ distributed:
237
+ backend: nccl
238
+ find_unused_parameters: true
239
+ optim:
240
+ amp:
241
+ enabled: true
242
+ amp_dtype: bfloat16
243
+ optimizer:
244
+ _target_: torch.optim.AdamW
245
+ gradient_clip:
246
+ _target_: training.optimizer.GradientClipper
247
+ max_norm: 0.1
248
+ norm_type: 2
249
+ param_group_modifiers:
250
+ - _target_: training.optimizer.layer_decay_param_modifier
251
+ _partial_: true
252
+ layer_decay_value: 0.9
253
+ apply_to: image_encoder.trunk
254
+ overrides:
255
+ - pattern: '*pos_embed*'
256
+ value: 1.0
257
+ options:
258
+ lr:
259
+ - scheduler:
260
+ _target_: fvcore.common.param_scheduler.CosineParamScheduler
261
+ start_value: ${scratch.base_lr}
262
+ end_value: ${divide:${scratch.base_lr},10}
263
+ - scheduler:
264
+ _target_: fvcore.common.param_scheduler.CosineParamScheduler
265
+ start_value: ${scratch.vision_lr}
266
+ end_value: ${divide:${scratch.vision_lr},10}
267
+ param_names:
268
+ - image_encoder.*
269
+ weight_decay:
270
+ - scheduler:
271
+ _target_: fvcore.common.param_scheduler.ConstantParamScheduler
272
+ value: 0.1
273
+ - scheduler:
274
+ _target_: fvcore.common.param_scheduler.ConstantParamScheduler
275
+ value: 0.0
276
+ param_names:
277
+ - '*bias*'
278
+ module_cls_names:
279
+ - torch.nn.LayerNorm
280
+ loss:
281
+ all:
282
+ _target_: training.loss_fns.MultiStepMultiMasksAndIous
283
+ weight_dict:
284
+ loss_mask: 20
285
+ loss_dice: 1
286
+ loss_iou: 1
287
+ loss_class: 1
288
+ supervise_all_iou: true
289
+ iou_use_l1_loss: true
290
+ pred_obj_scores: true
291
+ focal_alpha_obj_score: -1.0
292
+ focal_gamma_obj_score: 0
293
+ launcher:
294
+ num_nodes: 1
295
+ gpus_per_node: 8
296
+ experiment_log_dir: /mnt/bn/bes-nas-zqz-lq-v6arnold6/mlx/users/zhangqizhe/code/Med/TumorSeg/logs/train/sam2.1/hiera_t/one_512_2d_
297
+ submitit:
298
+ partition: null
299
+ account: null
300
+ qos: null
301
+ cpus_per_task: 10
302
+ use_cluster: false
303
+ timeout_hour: 24
304
+ name: null
305
+ port_range:
306
+ - 10000
307
+ - 65000