AnmolSharma21 commited on
Commit
f587157
·
verified ·
1 Parent(s): b484572

Add files using upload-large-folder tool

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ merged/tokenizer.json filter=lfs diff=lfs merge=lfs -text
debug.log ADDED
@@ -0,0 +1,264 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [2026-03-23 14:34:32,736] [DEBUG] [axolotl.utils.config.log_gpu_memory_usage:127] [PID:26511] baseline 0.000GB ()
2
+ [2026-03-23 14:34:32,739] [INFO] [axolotl.cli.config.load_cfg:341] [PID:26511] config:
3
+ {
4
+ "activation_offloading": false,
5
+ "adapter": "lora",
6
+ "axolotl_config_path": "config.yml",
7
+ "base_model": "Intelligent-Internet/II-Medical-8B",
8
+ "base_model_config": "Intelligent-Internet/II-Medical-8B",
9
+ "batch_size": 16,
10
+ "bf16": true,
11
+ "capabilities": {
12
+ "bf16": true,
13
+ "compute_capability": "sm_100",
14
+ "fp8": true,
15
+ "n_gpu": 1,
16
+ "n_node": 1,
17
+ "tf32": true
18
+ },
19
+ "chat_template": "tokenizer_default",
20
+ "context_parallel_size": 1,
21
+ "dataloader_num_workers": 1,
22
+ "dataloader_pin_memory": true,
23
+ "dataloader_prefetch_factor": 256,
24
+ "dataset_num_proc": 24,
25
+ "dataset_prepared_path": "last_run_prepared",
26
+ "datasets": [
27
+ {
28
+ "message_property_mappings": {
29
+ "content": "content",
30
+ "role": "role"
31
+ },
32
+ "path": "ruslanmv/HealthCareMagic-100k",
33
+ "trust_remote_code": false,
34
+ "type": "alpaca"
35
+ },
36
+ {
37
+ "message_property_mappings": {
38
+ "content": "content",
39
+ "role": "role"
40
+ },
41
+ "path": "medalpaca/medical_meadow_mediqa",
42
+ "trust_remote_code": false,
43
+ "type": "alpaca"
44
+ },
45
+ {
46
+ "message_property_mappings": {
47
+ "content": "content",
48
+ "role": "role"
49
+ },
50
+ "path": "medalpaca/medical_meadow_medical_flashcards",
51
+ "trust_remote_code": false,
52
+ "type": "alpaca"
53
+ },
54
+ {
55
+ "message_property_mappings": {
56
+ "content": "content",
57
+ "role": "role"
58
+ },
59
+ "path": "ruslanmv/icliniq-7k",
60
+ "trust_remote_code": false,
61
+ "type": {
62
+ "field_instruction": "input",
63
+ "field_output": "answer_icliniq",
64
+ "format": "{instruction}",
65
+ "no_input_format": "{instruction}",
66
+ "system_prompt": "You are a helpful medical assistant."
67
+ }
68
+ },
69
+ {
70
+ "message_property_mappings": {
71
+ "content": "content",
72
+ "role": "role"
73
+ },
74
+ "path": "keivalya/MedQuad-MedicalQnADataset",
75
+ "trust_remote_code": false,
76
+ "type": {
77
+ "field_instruction": "Question",
78
+ "field_output": "Answer",
79
+ "format": "{instruction}",
80
+ "no_input_format": "{instruction}",
81
+ "system_prompt": "You are a helpful medical assistant."
82
+ }
83
+ },
84
+ {
85
+ "message_property_mappings": {
86
+ "content": "content",
87
+ "role": "role"
88
+ },
89
+ "path": "mohammad2928git/complete_medical_symptom_dataset",
90
+ "trust_remote_code": false,
91
+ "type": {
92
+ "field_instruction": "text",
93
+ "field_output": "Name",
94
+ "format": "{instruction}",
95
+ "no_input_format": "{instruction}",
96
+ "system_prompt": "You are a helpful medical diagnostic assistant. Based on the patient's symptoms, identify the most likely condition."
97
+ }
98
+ },
99
+ {
100
+ "field": "page_text",
101
+ "message_property_mappings": {
102
+ "content": "content",
103
+ "role": "role"
104
+ },
105
+ "path": "gamino/wiki_medical_terms",
106
+ "trust_remote_code": false,
107
+ "type": "completion"
108
+ }
109
+ ],
110
+ "ddp": false,
111
+ "device": "cuda:0",
112
+ "device_map": "auto",
113
+ "dion_rank_fraction": 1.0,
114
+ "dion_rank_multiple_of": 1,
115
+ "eaft_alpha": 1.0,
116
+ "eaft_k": 20,
117
+ "env_capabilities": {
118
+ "torch_version": "2.9.1"
119
+ },
120
+ "eval_batch_size": 8,
121
+ "eval_causal_lm_metrics": [
122
+ "sacrebleu",
123
+ "comet",
124
+ "ter",
125
+ "chrf"
126
+ ],
127
+ "eval_max_new_tokens": 128,
128
+ "eval_sample_packing": false,
129
+ "eval_steps": 0.08333333333333333,
130
+ "eval_table_size": 0,
131
+ "evals_per_epoch": 4,
132
+ "experimental_skip_move_to_device": true,
133
+ "flash_attention": false,
134
+ "fp16": false,
135
+ "generate_samples": false,
136
+ "generation_do_sample": true,
137
+ "generation_max_new_tokens": 50,
138
+ "generation_prompt_ratio": 0.5,
139
+ "generation_temperature": 0.7,
140
+ "gradient_accumulation_steps": 1,
141
+ "gradient_checkpointing": true,
142
+ "gradient_checkpointing_kwargs": {
143
+ "use_reentrant": true
144
+ },
145
+ "group_by_length": false,
146
+ "include_tkps": true,
147
+ "is_falcon_derived_model": false,
148
+ "is_llama_derived_model": false,
149
+ "is_mistral_derived_model": false,
150
+ "learning_rate": 0.0002,
151
+ "lisa_layers_attribute": "model.layers",
152
+ "load_best_model_at_end": false,
153
+ "load_in_4bit": false,
154
+ "load_in_8bit": false,
155
+ "local_rank": 0,
156
+ "logging_steps": 1,
157
+ "lora_alpha": 16,
158
+ "lora_dropout": 0.05,
159
+ "lora_model_dir": "./medical-llm-out",
160
+ "lora_r": 32,
161
+ "lora_target_modules": [
162
+ "q_proj",
163
+ "v_proj",
164
+ "k_proj",
165
+ "o_proj",
166
+ "gate_proj",
167
+ "down_proj",
168
+ "up_proj"
169
+ ],
170
+ "loraplus_lr_embedding": 1e-06,
171
+ "lr_scheduler": "cosine",
172
+ "mean_resizing_embeddings": false,
173
+ "merge_lora": true,
174
+ "micro_batch_size": 16,
175
+ "model_config_type": "qwen3",
176
+ "num_epochs": 3.0,
177
+ "num_generation_samples": 3,
178
+ "optimizer": "paged_adamw_32bit",
179
+ "otel_metrics_host": "localhost",
180
+ "otel_metrics_port": 8000,
181
+ "output_dir": "./medical-llm-merged",
182
+ "pad_to_sequence_len": true,
183
+ "pretrain_multipack_attn": true,
184
+ "profiler_steps_start": 0,
185
+ "qlora_sharded_model_loading": false,
186
+ "quantize_moe_experts": false,
187
+ "ray_num_workers": 1,
188
+ "remove_unused_columns": false,
189
+ "resources_per_worker": {
190
+ "GPU": 1
191
+ },
192
+ "sample_packing": true,
193
+ "sample_packing_bin_size": 200,
194
+ "sample_packing_group_size": 100000,
195
+ "save_only_model": false,
196
+ "save_safetensors": true,
197
+ "save_steps": 0.3333333333333333,
198
+ "saves_per_epoch": 1,
199
+ "sequence_len": 4096,
200
+ "shuffle_before_merging_datasets": false,
201
+ "shuffle_merged_datasets": true,
202
+ "skip_prepare_dataset": false,
203
+ "streaming_multipack_buffer_size": 10000,
204
+ "strict": false,
205
+ "tensor_parallel_size": 1,
206
+ "tf32": true,
207
+ "tiled_mlp_use_original_mlp": true,
208
+ "tokenizer_config": "Intelligent-Internet/II-Medical-8B",
209
+ "tokenizer_save_jinja_files": true,
210
+ "tokenizer_type": "AutoTokenizer",
211
+ "torch_dtype": "torch.bfloat16",
212
+ "train_on_inputs": false,
213
+ "trl": {
214
+ "async_prefetch": false,
215
+ "log_completions": false,
216
+ "mask_truncated_completions": false,
217
+ "ref_model_mixup_alpha": 0.9,
218
+ "ref_model_sync_steps": 64,
219
+ "replay_buffer_size": 0,
220
+ "replay_recompute_logps": true,
221
+ "reroll_max_groups": 1,
222
+ "reroll_start_fraction": 1.0,
223
+ "reward_num_workers": 1,
224
+ "scale_rewards": true,
225
+ "skip_zero_advantage_batches": true,
226
+ "sync_ref_model": false,
227
+ "use_data_producer": false,
228
+ "use_vllm": false,
229
+ "vllm_lora_sync": false,
230
+ "vllm_server_host": "0.0.0.0",
231
+ "vllm_server_port": 8000
232
+ },
233
+ "type_of_model": "AutoModelForCausalLM",
234
+ "use_otel_metrics": false,
235
+ "use_ray": false,
236
+ "val_set_size": 0.05,
237
+ "vllm": {
238
+ "device": "auto",
239
+ "dtype": "auto",
240
+ "gpu_memory_utilization": 0.9,
241
+ "host": "0.0.0.0",
242
+ "port": 8000
243
+ },
244
+ "warmup_steps": 10,
245
+ "weight_decay": 0.0,
246
+ "world_size": 1
247
+ }
248
+ [2026-03-23 14:34:32,740] [INFO] [axolotl.cli.utils.load.load_model_and_tokenizer:40] [PID:26511] loading tokenizer... Intelligent-Internet/II-Medical-8B
249
+ [2026-03-23 14:34:34,848] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:299] [PID:26511] EOS: 151645 / <|im_end|>
250
+ [2026-03-23 14:34:34,849] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:300] [PID:26511] BOS: None / None
251
+ [2026-03-23 14:34:34,849] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:301] [PID:26511] PAD: 151643 / <|endoftext|>
252
+ [2026-03-23 14:34:34,849] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:302] [PID:26511] UNK: None / None
253
+ [2026-03-23 14:34:34,849] [INFO] [axolotl.cli.utils.load.load_model_and_tokenizer:43] [PID:26511] loading model...
254
+ [2026-03-23 14:34:35,028] [DEBUG] [axolotl.monkeypatch.transformers.trainer_loss_calc.patch_evaluation_loop:91] [PID:26511] Patched Trainer.evaluation_loop with nanmean loss calculation
255
+ [2026-03-23 14:34:35,033] [DEBUG] [axolotl.monkeypatch.transformers.trainer_loss_calc.patch_maybe_log_save_evaluate:142] [PID:26511] Patched Trainer._maybe_log_save_evaluate with nanmean loss calculation
256
+ [2026-03-23 14:34:35,035] [INFO] [axolotl.loaders.patch_manager._apply_multipack_patches:402] [PID:26511] Applying multipack dataloader patch for sample packing...
257
+
258
+ [2026-03-23 14:35:06,408] [INFO] [axolotl.loaders.model._configure_embedding_dtypes:361] [PID:26511] Converting modules to torch.bfloat16
259
+ [2026-03-23 14:35:06,416] [DEBUG] [axolotl.loaders.model.log_gpu_memory_usage:127] [PID:26511] Memory usage after model load 18.735GB (+18.735GB allocated, +19.895GB reserved)
260
+ [2026-03-23 14:35:06,417] [DEBUG] [axolotl.loaders.adapter.load_lora:150] [PID:26511] Loading pretrained PEFT - LoRA
261
+ trainable params: 87,293,952 || all params: 8,278,029,312 || trainable%: 1.0545
262
+ [2026-03-23 14:35:24,275] [DEBUG] [axolotl.loaders.model.log_gpu_memory_usage:127] [PID:26511] after adapters 15.907GB (+15.907GB allocated, +20.229GB reserved)
263
+
264
+
merged/chat_template.jinja ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{ '<|im_start|>' + message['role'] + '
2
+ ' + message['content'] | trim + '<|im_end|>
3
+ ' }}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant
4
+ ' }}{% endif %}
merged/config.json ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": null,
8
+ "dtype": "bfloat16",
9
+ "eos_token_id": 151645,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 4096,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 12288,
15
+ "layer_types": [
16
+ "full_attention",
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention",
49
+ "full_attention",
50
+ "full_attention",
51
+ "full_attention"
52
+ ],
53
+ "max_position_embeddings": 40960,
54
+ "max_window_layers": 36,
55
+ "model_type": "qwen3",
56
+ "num_attention_heads": 32,
57
+ "num_hidden_layers": 36,
58
+ "num_key_value_heads": 8,
59
+ "pad_token_id": null,
60
+ "rms_norm_eps": 1e-06,
61
+ "rope_parameters": {
62
+ "rope_theta": 1000000,
63
+ "rope_type": "default"
64
+ },
65
+ "sliding_window": null,
66
+ "tie_word_embeddings": false,
67
+ "transformers_version": "5.3.0",
68
+ "use_cache": true,
69
+ "use_sliding_window": false,
70
+ "vocab_size": 151936
71
+ }
merged/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 151643,
4
+ "do_sample": true,
5
+ "eos_token_id": 151645,
6
+ "transformers_version": "5.3.0"
7
+ }
merged/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76404fea37abc0175f836dea4788ca44c7912a414a7814d91a7a2daabde5ccd6
3
+ size 16381517208
merged/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506
3
+ size 11422650
merged/tokenizer_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "backend": "tokenizers",
4
+ "bos_token": null,
5
+ "clean_up_tokenization_spaces": false,
6
+ "eos_token": "<|im_end|>",
7
+ "errors": "replace",
8
+ "is_local": false,
9
+ "model_max_length": 131072,
10
+ "pad_token": "<|endoftext|>",
11
+ "padding_side": "right",
12
+ "split_special_tokens": false,
13
+ "tokenizer_class": "Qwen2Tokenizer",
14
+ "unk_token": null
15
+ }