WinstonHu commited on
Commit
6c3e1df
·
verified ·
1 Parent(s): 1871c8a

Upload folder 20250922_053837 to stage_1/token_merging/

Browse files
stage_1/token_merging/20250922_053837.log ADDED
The diff for this file is too large to render. See raw diff
 
stage_1/token_merging/vis_data/20250922_053837.json ADDED
The diff for this file is too large to render. See raw diff
 
stage_1/token_merging/vis_data/config.py ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ SYSTEM = ''
2
+ accumulative_counts = 400
3
+ batch_size = 1
4
+ betas = (
5
+ 0.9,
6
+ 0.999,
7
+ )
8
+ bnb = dict(
9
+ bnb_4bit_compute_dtype='torch.bfloat16',
10
+ bnb_4bit_quant_type='nf4',
11
+ bnb_4bit_use_double_quant=True,
12
+ llm_int8_has_fp16_weight=False,
13
+ llm_int8_threshold=6.0,
14
+ load_in_4bit=True,
15
+ load_in_8bit=False,
16
+ type='transformers.BitsAndBytesConfig')
17
+ custom_hooks = [
18
+ dict(
19
+ tokenizer=dict(
20
+ padding_side='right',
21
+ pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct',
22
+ trust_remote_code=True,
23
+ type='transformers.AutoTokenizer.from_pretrained'),
24
+ type='xtuner.engine.hooks.DatasetInfoHook'),
25
+ dict(
26
+ evaluation_images=
27
+ '/mnt/bn/xudong-va/meilong/datasets/Token_Compression/skcm_224x224_b20_t15/h5_files/TCGA-EB-A5UN-06Z-00-DX1.h5',
28
+ evaluation_inputs=[
29
+ 'Are the tumor cells organized in a lobulated pattern within the slide?',
30
+ ],
31
+ every_n_iters=512,
32
+ prompt_template='xtuner.utils.PROMPT_TEMPLATE.qwen_chat',
33
+ system='',
34
+ tokenizer=dict(
35
+ padding_side='right',
36
+ pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct',
37
+ trust_remote_code=True,
38
+ type='transformers.AutoTokenizer.from_pretrained'),
39
+ type='xtuner.engine.hooks.EvaluateChatHook'),
40
+ dict(type='xtuner.engine.hooks.ThroughputHook'),
41
+ ]
42
+ data_path = '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/merged_dataset/stage1_morph2.json'
43
+ dataloader_num_workers = 5
44
+ default_hooks = dict(
45
+ checkpoint=dict(
46
+ by_epoch=False,
47
+ interval=5120,
48
+ max_keep_ckpts=8,
49
+ type='mmengine.hooks.CheckpointHook'),
50
+ logger=dict(
51
+ interval=10,
52
+ log_metric_by_epoch=False,
53
+ type='mmengine.hooks.LoggerHook'),
54
+ param_scheduler=dict(type='mmengine.hooks.ParamSchedulerHook'),
55
+ sampler_seed=dict(type='mmengine.hooks.DistSamplerSeedHook'),
56
+ timer=dict(type='mmengine.hooks.IterTimerHook'))
57
+ env_cfg = dict(
58
+ cudnn_benchmark=False,
59
+ dist_cfg=dict(backend='nccl'),
60
+ mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
61
+ evaluation_freq = 512
62
+ evaluation_images = '/mnt/bn/xudong-va/meilong/datasets/Token_Compression/skcm_224x224_b20_t15/h5_files/TCGA-EB-A5UN-06Z-00-DX1.h5'
63
+ evaluation_inputs = [
64
+ 'Are the tumor cells organized in a lobulated pattern within the slide?',
65
+ ]
66
+ image_path_list = None
67
+ launcher = 'pytorch'
68
+ llava_dataset = dict(
69
+ data_path=
70
+ '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/merged_dataset/stage1_morph2.json',
71
+ dataset_map_fn='xtuner.dataset.map_fns.llava_map_fn',
72
+ identifier='_224x224_b20_t15',
73
+ image_feature_prefix='/mnt/bn/xudong-va/meilong/datasets/Token_Compression',
74
+ image_feature_suffix='.h5',
75
+ image_folder='',
76
+ image_path_list=None,
77
+ max_length=15836,
78
+ pad_image_to_square=False,
79
+ per_image_length=10240,
80
+ sample_num=10240,
81
+ sample_strategy='linspace',
82
+ template_map_fn=dict(
83
+ template='xtuner.utils.PROMPT_TEMPLATE.qwen_chat',
84
+ type='xtuner.dataset.map_fns.template_map_fn_factory'),
85
+ tokenizer=dict(
86
+ padding_side='right',
87
+ pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct',
88
+ trust_remote_code=True,
89
+ type='transformers.AutoTokenizer.from_pretrained'),
90
+ type='xtuner.dataset.LLaVADataset',
91
+ unwanted_prefix_csv=
92
+ '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/merged_dataset/missing_slides3.csv'
93
+ )
94
+ llm_name_or_path = 'Qwen/Qwen2.5-7B-Instruct'
95
+ load_from = None
96
+ log_level = 'INFO'
97
+ log_processor = dict(
98
+ by_epoch=False,
99
+ mean_pattern='.*(loss|time|data_time|grad_norm|tflops).*',
100
+ window_size=1)
101
+ lr = 0.001
102
+ max_epochs = 2
103
+ max_length = 15836
104
+ max_norm = 1
105
+ model = dict(
106
+ enable_token_merge=True,
107
+ freeze_llm=True,
108
+ llm=dict(
109
+ attn_implementation='flash_attention_2',
110
+ pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct',
111
+ quantization_config=dict(
112
+ bnb_4bit_compute_dtype='torch.bfloat16',
113
+ bnb_4bit_quant_type='nf4',
114
+ bnb_4bit_use_double_quant=True,
115
+ llm_int8_has_fp16_weight=False,
116
+ llm_int8_threshold=6.0,
117
+ load_in_4bit=True,
118
+ load_in_8bit=False,
119
+ type='transformers.BitsAndBytesConfig'),
120
+ torch_dtype='torch.bfloat16',
121
+ trust_remote_code=True,
122
+ type='transformers.AutoModelForCausalLM.from_pretrained'),
123
+ max_position_embeddings=None,
124
+ train_stage='1',
125
+ type='xtuner.model.llava_no_longnet.LLaVAModel',
126
+ use_perceiver_resampler=False)
127
+ optim_type = 'torch.optim.AdamW'
128
+ optim_wrapper = dict(
129
+ optimizer=dict(
130
+ betas=(
131
+ 0.9,
132
+ 0.999,
133
+ ),
134
+ lr=0.001,
135
+ type='torch.optim.AdamW',
136
+ weight_decay=0.0),
137
+ paramwise_cfg=dict(bias_decay_mult=0.0, norm_decay_mult=0.0),
138
+ type='DeepSpeedOptimWrapper')
139
+ param_scheduler = [
140
+ dict(
141
+ begin=0,
142
+ by_epoch=True,
143
+ convert_to_iter_based=True,
144
+ end=0.1,
145
+ start_factor=0.01,
146
+ type='mmengine.optim.LinearLR'),
147
+ dict(
148
+ begin=0.1,
149
+ by_epoch=True,
150
+ convert_to_iter_based=True,
151
+ end=2,
152
+ eta_min=0.0,
153
+ type='mmengine.optim.CosineAnnealingLR'),
154
+ ]
155
+ per_image_length = 10240
156
+ prompt_template = 'xtuner.utils.PROMPT_TEMPLATE.qwen_chat'
157
+ randomness = dict(deterministic=False, seed=None)
158
+ resume = False
159
+ runner_type = 'FlexibleRunner'
160
+ sample_type = 'wsi'
161
+ save_steps = 5120
162
+ save_total_limit = 8
163
+ seed = 2025
164
+ strategy = dict(
165
+ config=dict(
166
+ bf16=dict(enabled=True),
167
+ fp16=dict(enabled=False, initial_scale_power=16),
168
+ gradient_accumulation_steps='auto',
169
+ gradient_clipping='auto',
170
+ train_micro_batch_size_per_gpu='auto',
171
+ zero_allow_untested_optimizer=True,
172
+ zero_force_ds_cpu_optimizer=False,
173
+ zero_optimization=dict(overlap_comm=False, stage=2)),
174
+ exclude_frozen_parameters=True,
175
+ gradient_accumulation_steps=400,
176
+ gradient_clipping=1,
177
+ sequence_parallel_size=1,
178
+ train_micro_batch_size_per_gpu=1,
179
+ type='xtuner.engine.DeepSpeedStrategy')
180
+ tokenizer = dict(
181
+ padding_side='right',
182
+ pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct',
183
+ trust_remote_code=True,
184
+ type='transformers.AutoTokenizer.from_pretrained')
185
+ train_cfg = dict(max_epochs=2, type='xtuner.engine.runner.TrainLoop')
186
+ train_dataloader = dict(
187
+ batch_size=1,
188
+ collate_fn=dict(type='xtuner.dataset.collate_fns.default_collate_fn'),
189
+ dataset=dict(
190
+ data_path=
191
+ '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/merged_dataset/stage1_morph2.json',
192
+ dataset_map_fn='xtuner.dataset.map_fns.llava_map_fn',
193
+ identifier='_224x224_b20_t15',
194
+ image_feature_prefix=
195
+ '/mnt/bn/xudong-va/meilong/datasets/Token_Compression',
196
+ image_feature_suffix='.h5',
197
+ image_folder='',
198
+ image_path_list=None,
199
+ max_length=15836,
200
+ pad_image_to_square=False,
201
+ per_image_length=10240,
202
+ sample_num=10240,
203
+ sample_strategy='linspace',
204
+ template_map_fn=dict(
205
+ template='xtuner.utils.PROMPT_TEMPLATE.qwen_chat',
206
+ type='xtuner.dataset.map_fns.template_map_fn_factory'),
207
+ tokenizer=dict(
208
+ padding_side='right',
209
+ pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct',
210
+ trust_remote_code=True,
211
+ type='transformers.AutoTokenizer.from_pretrained'),
212
+ type='xtuner.dataset.LLaVADataset',
213
+ unwanted_prefix_csv=
214
+ '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/merged_dataset/missing_slides3.csv'
215
+ ),
216
+ num_workers=5,
217
+ persistent_workers=True,
218
+ pin_memory=True,
219
+ prefetch_factor=4,
220
+ sampler=dict(shuffle=True, type='mmengine.dataset.DefaultSampler'))
221
+ visualizer = None
222
+ warmup_ratio = 0.05
223
+ weight_decay = 0.0
224
+ work_dir = '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/checkpoints/stage_1/token_merging/'
stage_1/token_merging/vis_data/eval_outputs_iter_10239.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ Eval output 1:
2
+ <|im_start|>user
3
+ <image>
4
+ Are the tumor cells organized in a lobulated pattern within the slide?<|im_end|>
5
+ <|im_start|>assistant
6
+ Yes, the tumor cells are organized in a lobulated pattern within the slide. The lobules are well-defined and separated by fibrous stroma.<|im_end|>
7
+
8
+
stage_1/token_merging/vis_data/eval_outputs_iter_15359.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ Eval output 1:
2
+ <|im_start|>user
3
+ <image>
4
+ Are the tumor cells organized in a lobulated pattern within the slide?<|im_end|>
5
+ <|im_start|>assistant
6
+ Yes, the tumor cells are organized in a lobulated pattern. The lobules are well-defined and separated by fibrous stroma, creating a distinct and organized structure.<|im_end|>
7
+
8
+
stage_1/token_merging/vis_data/eval_outputs_iter_20479.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ Eval output 1:
2
+ <|im_start|>user
3
+ <image>
4
+ Are the tumor cells organized in a lobulated pattern within the slide?<|im_end|>
5
+ <|im_start|>assistant
6
+ Yes, the tumor cells are organized in a lobulated pattern. The lobules are well-defined and separated by fibrous stroma, creating a distinct and organized structure.<|im_end|>
7
+
8
+
stage_1/token_merging/vis_data/eval_outputs_iter_23095.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ Eval output 1:
2
+ <|im_start|>user
3
+ <image>
4
+ Are the tumor cells organized in a lobulated pattern within the slide?<|im_end|>
5
+ <|im_start|>assistant
6
+ Yes, the tumor cells are organized in a lobulated pattern. The lobules are well-defined and separated by fibrous stroma, creating a distinct and organized structure.<|im_end|>
7
+
8
+
stage_1/token_merging/vis_data/eval_outputs_iter_5119.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ Eval output 1:
2
+ <|im_start|>user
3
+ <image>
4
+ Are the tumor cells organized in a lobulated pattern within the slide?<|im_end|>
5
+ <|im_start|>assistant
6
+ Yes, the tumor cells are indeed organized in a lobulated pattern. This arrangement is characterized by the presence of multiple lobes or areas of tumor tissue, each with its own distinct boundaries. The lobules are typically well-defined and separated by fibrous stroma, which contributes to the overall architecture of the lesion.<|im_end|>
7
+
8
+
stage_1/token_merging/vis_data/scalars.json ADDED
The diff for this file is too large to render. See raw diff