| [2026-03-23 14:34:32,736] [DEBUG] [axolotl.utils.config.log_gpu_memory_usage:127] [PID:26511] baseline 0.000GB () |
| [2026-03-23 14:34:32,739] [INFO] [axolotl.cli.config.load_cfg:341] [PID:26511] config: |
| { |
| "activation_offloading": false, |
| "adapter": "lora", |
| "axolotl_config_path": "config.yml", |
| "base_model": "Intelligent-Internet/II-Medical-8B", |
| "base_model_config": "Intelligent-Internet/II-Medical-8B", |
| "batch_size": 16, |
| "bf16": true, |
| "capabilities": { |
| "bf16": true, |
| "compute_capability": "sm_100", |
| "fp8": true, |
| "n_gpu": 1, |
| "n_node": 1, |
| "tf32": true |
| }, |
| "chat_template": "tokenizer_default", |
| "context_parallel_size": 1, |
| "dataloader_num_workers": 1, |
| "dataloader_pin_memory": true, |
| "dataloader_prefetch_factor": 256, |
| "dataset_num_proc": 24, |
| "dataset_prepared_path": "last_run_prepared", |
| "datasets": [ |
| { |
| "message_property_mappings": { |
| "content": "content", |
| "role": "role" |
| }, |
| "path": "ruslanmv/HealthCareMagic-100k", |
| "trust_remote_code": false, |
| "type": "alpaca" |
| }, |
| { |
| "message_property_mappings": { |
| "content": "content", |
| "role": "role" |
| }, |
| "path": "medalpaca/medical_meadow_mediqa", |
| "trust_remote_code": false, |
| "type": "alpaca" |
| }, |
| { |
| "message_property_mappings": { |
| "content": "content", |
| "role": "role" |
| }, |
| "path": "medalpaca/medical_meadow_medical_flashcards", |
| "trust_remote_code": false, |
| "type": "alpaca" |
| }, |
| { |
| "message_property_mappings": { |
| "content": "content", |
| "role": "role" |
| }, |
| "path": "ruslanmv/icliniq-7k", |
| "trust_remote_code": false, |
| "type": { |
| "field_instruction": "input", |
| "field_output": "answer_icliniq", |
| "format": "{instruction}", |
| "no_input_format": "{instruction}", |
| "system_prompt": "You are a helpful medical assistant." |
| } |
| }, |
| { |
| "message_property_mappings": { |
| "content": "content", |
| "role": "role" |
| }, |
| "path": "keivalya/MedQuad-MedicalQnADataset", |
| "trust_remote_code": false, |
| "type": { |
| "field_instruction": "Question", |
| "field_output": "Answer", |
| "format": "{instruction}", |
| "no_input_format": "{instruction}", |
| "system_prompt": "You are a helpful medical assistant." |
| } |
| }, |
| { |
| "message_property_mappings": { |
| "content": "content", |
| "role": "role" |
| }, |
| "path": "mohammad2928git/complete_medical_symptom_dataset", |
| "trust_remote_code": false, |
| "type": { |
| "field_instruction": "text", |
| "field_output": "Name", |
| "format": "{instruction}", |
| "no_input_format": "{instruction}", |
| "system_prompt": "You are a helpful medical diagnostic assistant. Based on the patient's symptoms, identify the most likely condition." |
| } |
| }, |
| { |
| "field": "page_text", |
| "message_property_mappings": { |
| "content": "content", |
| "role": "role" |
| }, |
| "path": "gamino/wiki_medical_terms", |
| "trust_remote_code": false, |
| "type": "completion" |
| } |
| ], |
| "ddp": false, |
| "device": "cuda:0", |
| "device_map": "auto", |
| "dion_rank_fraction": 1.0, |
| "dion_rank_multiple_of": 1, |
| "eaft_alpha": 1.0, |
| "eaft_k": 20, |
| "env_capabilities": { |
| "torch_version": "2.9.1" |
| }, |
| "eval_batch_size": 8, |
| "eval_causal_lm_metrics": [ |
| "sacrebleu", |
| "comet", |
| "ter", |
| "chrf" |
| ], |
| "eval_max_new_tokens": 128, |
| "eval_sample_packing": false, |
| "eval_steps": 0.08333333333333333, |
| "eval_table_size": 0, |
| "evals_per_epoch": 4, |
| "experimental_skip_move_to_device": true, |
| "flash_attention": false, |
| "fp16": false, |
| "generate_samples": false, |
| "generation_do_sample": true, |
| "generation_max_new_tokens": 50, |
| "generation_prompt_ratio": 0.5, |
| "generation_temperature": 0.7, |
| "gradient_accumulation_steps": 1, |
| "gradient_checkpointing": true, |
| "gradient_checkpointing_kwargs": { |
| "use_reentrant": true |
| }, |
| "group_by_length": false, |
| "include_tkps": true, |
| "is_falcon_derived_model": false, |
| "is_llama_derived_model": false, |
| "is_mistral_derived_model": false, |
| "learning_rate": 0.0002, |
| "lisa_layers_attribute": "model.layers", |
| "load_best_model_at_end": false, |
| "load_in_4bit": false, |
| "load_in_8bit": false, |
| "local_rank": 0, |
| "logging_steps": 1, |
| "lora_alpha": 16, |
| "lora_dropout": 0.05, |
| "lora_model_dir": "./medical-llm-out", |
| "lora_r": 32, |
| "lora_target_modules": [ |
| "q_proj", |
| "v_proj", |
| "k_proj", |
| "o_proj", |
| "gate_proj", |
| "down_proj", |
| "up_proj" |
| ], |
| "loraplus_lr_embedding": 1e-06, |
| "lr_scheduler": "cosine", |
| "mean_resizing_embeddings": false, |
| "merge_lora": true, |
| "micro_batch_size": 16, |
| "model_config_type": "qwen3", |
| "num_epochs": 3.0, |
| "num_generation_samples": 3, |
| "optimizer": "paged_adamw_32bit", |
| "otel_metrics_host": "localhost", |
| "otel_metrics_port": 8000, |
| "output_dir": "./medical-llm-merged", |
| "pad_to_sequence_len": true, |
| "pretrain_multipack_attn": true, |
| "profiler_steps_start": 0, |
| "qlora_sharded_model_loading": false, |
| "quantize_moe_experts": false, |
| "ray_num_workers": 1, |
| "remove_unused_columns": false, |
| "resources_per_worker": { |
| "GPU": 1 |
| }, |
| "sample_packing": true, |
| "sample_packing_bin_size": 200, |
| "sample_packing_group_size": 100000, |
| "save_only_model": false, |
| "save_safetensors": true, |
| "save_steps": 0.3333333333333333, |
| "saves_per_epoch": 1, |
| "sequence_len": 4096, |
| "shuffle_before_merging_datasets": false, |
| "shuffle_merged_datasets": true, |
| "skip_prepare_dataset": false, |
| "streaming_multipack_buffer_size": 10000, |
| "strict": false, |
| "tensor_parallel_size": 1, |
| "tf32": true, |
| "tiled_mlp_use_original_mlp": true, |
| "tokenizer_config": "Intelligent-Internet/II-Medical-8B", |
| "tokenizer_save_jinja_files": true, |
| "tokenizer_type": "AutoTokenizer", |
| "torch_dtype": "torch.bfloat16", |
| "train_on_inputs": false, |
| "trl": { |
| "async_prefetch": false, |
| "log_completions": false, |
| "mask_truncated_completions": false, |
| "ref_model_mixup_alpha": 0.9, |
| "ref_model_sync_steps": 64, |
| "replay_buffer_size": 0, |
| "replay_recompute_logps": true, |
| "reroll_max_groups": 1, |
| "reroll_start_fraction": 1.0, |
| "reward_num_workers": 1, |
| "scale_rewards": true, |
| "skip_zero_advantage_batches": true, |
| "sync_ref_model": false, |
| "use_data_producer": false, |
| "use_vllm": false, |
| "vllm_lora_sync": false, |
| "vllm_server_host": "0.0.0.0", |
| "vllm_server_port": 8000 |
| }, |
| "type_of_model": "AutoModelForCausalLM", |
| "use_otel_metrics": false, |
| "use_ray": false, |
| "val_set_size": 0.05, |
| "vllm": { |
| "device": "auto", |
| "dtype": "auto", |
| "gpu_memory_utilization": 0.9, |
| "host": "0.0.0.0", |
| "port": 8000 |
| }, |
| "warmup_steps": 10, |
| "weight_decay": 0.0, |
| "world_size": 1 |
| } |
| [2026-03-23 14:34:32,740] [INFO] [axolotl.cli.utils.load.load_model_and_tokenizer:40] [PID:26511] loading tokenizer... Intelligent-Internet/II-Medical-8B |
| [2026-03-23 14:34:34,848] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:299] [PID:26511] EOS: 151645 / <|im_end|> |
| [2026-03-23 14:34:34,849] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:300] [PID:26511] BOS: None / None |
| [2026-03-23 14:34:34,849] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:301] [PID:26511] PAD: 151643 / <|endoftext|> |
| [2026-03-23 14:34:34,849] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:302] [PID:26511] UNK: None / None |
| [2026-03-23 14:34:34,849] [INFO] [axolotl.cli.utils.load.load_model_and_tokenizer:43] [PID:26511] loading model... |
| [2026-03-23 14:34:35,028] [DEBUG] [axolotl.monkeypatch.transformers.trainer_loss_calc.patch_evaluation_loop:91] [PID:26511] Patched Trainer.evaluation_loop with nanmean loss calculation |
| [2026-03-23 14:34:35,033] [DEBUG] [axolotl.monkeypatch.transformers.trainer_loss_calc.patch_maybe_log_save_evaluate:142] [PID:26511] Patched Trainer._maybe_log_save_evaluate with nanmean loss calculation |
| [2026-03-23 14:34:35,035] [INFO] [axolotl.loaders.patch_manager._apply_multipack_patches:402] [PID:26511] Applying multipack dataloader patch for sample packing... |
|
Loading weights: 0%| | 0/399 [00:00<?, ?it/s]
Loading weights: 0%|β | 1/399 [00:02<13:49, 2.09s/it]
Loading weights: 1%|β | 2/399 [00:03<12:50, 1.94s/it]
Loading weights: 1%|β | 3/399 [00:04<07:32, 1.14s/it]
Loading weights: 1%|ββ | 5/399 [00:04<03:34, 1.84it/s]
Loading weights: 2%|ββ | 6/399 [00:04<02:50, 2.31it/s]
Loading weights: 3%|ββββ | 12/399 [00:04<00:55, 6.96it/s]
Loading weights: 4%|βββββ | 15/399 [00:04<00:45, 8.42it/s]
Loading weights: 4%|ββββββ | 17/399 [00:05<00:48, 7.93it/s]
Loading weights: 6%|ββββββββ | 22/399 [00:05<00:28, 13.01it/s]
Loading weights: 7%|βββββββββ | 26/399 [00:05<00:27, 13.79it/s]
Loading weights: 7%|ββββββββββ | 29/399 [00:05<00:31, 11.58it/s]
Loading weights: 9%|βββββββββββ | 34/399 [00:06<00:23, 15.26it/s]
Loading weights: 9%|ββββββββββββ | 37/399 [00:06<00:24, 14.85it/s]
Loading weights: 10%|βββββββββββββ | 39/399 [00:06<00:32, 11.22it/s]
Loading weights: 11%|ββββββββββββββ | 43/399 [00:06<00:23, 14.91it/s]
Loading weights: 12%|ββββββββββββββββ | 47/399 [00:06<00:23, 15.23it/s]
Loading weights: 12%|ββββββββββββββββ | 49/399 [00:07<00:24, 14.13it/s]
Loading weights: 13%|βββββββββββββββββ | 51/399 [00:07<00:25, 13.41it/s]
Loading weights: 14%|ββββββββββββββββββ | 54/399 [00:07<00:23, 14.52it/s]
Loading weights: 15%|ββββββββββββββββββββ | 59/399 [00:07<00:18, 18.32it/s]
Loading weights: 15%|ββββββββββββββββββββ | 61/399 [00:08<00:26, 12.54it/s]
Loading weights: 16%|ββββββββββββββββββββββ | 65/399 [00:08<00:20, 16.48it/s]
Loading weights: 17%|βββββββββββββββββββββββ | 69/399 [00:08<00:21, 15.57it/s]
Loading weights: 18%|ββββββββββββββββββββββββ | 71/399 [00:08<00:23, 14.07it/s]
Loading weights: 18%|ββββββββββββββββββββββββ | 73/399 [00:08<00:23, 13.85it/s]
Loading weights: 20%|ββββββββββββββββββββββββββ | 78/399 [00:08<00:16, 19.26it/s]
Loading weights: 20%|βββββββββββββββββββββββββββ | 81/399 [00:09<00:18, 17.58it/s]
Loading weights: 21%|ββββββββββββββββββββββββββββ | 84/399 [00:09<00:22, 14.24it/s]
Loading weights: 22%|βββββββββββββββββββββββββββββ | 89/399 [00:09<00:17, 18.01it/s]
Loading weights: 23%|ββββββββββββββββββββββββββββββ | 92/399 [00:09<00:18, 16.72it/s]
Loading weights: 24%|βββββββββββββββββββββββββββββββ | 94/399 [00:10<00:23, 13.16it/s]
Loading weights: 25%|βββββββββββββββββββββββββββββββββ | 100/399 [00:10<00:15, 19.26it/s]
Loading weights: 26%|ββββββββββββββββββββββββββββββββββ | 103/399 [00:10<00:16, 17.51it/s]
Loading weights: 27%|βββββββββββββββββββββββββββββββββββ | 106/399 [00:10<00:21, 13.95it/s]
Loading weights: 28%|ββββββββββββββββββββββββββββββββββββ | 111/399 [00:10<00:15, 18.46it/s]
Loading weights: 29%|βββββββββββββββββββββββββββββββββββββ | 114/399 [00:11<00:16, 17.76it/s]
Loading weights: 29%|ββββββββββββββββββββββββββββββββββββββ | 117/399 [00:11<00:20, 14.07it/s]
Loading weights: 31%|ββββββββββββββββββββββββββββββββββββββββ | 122/399 [00:11<00:15, 17.70it/s]
Loading weights: 31%|βββββββββββββββββββββββββββββββββββββββββ | 125/399 [00:11<00:16, 17.12it/s]
Loading weights: 32%|βββββββββββββββββββββββββββββββββββββββββ | 127/399 [00:12<00:21, 12.64it/s]
Loading weights: 33%|βββββββββββββββββββββββββββββββββββββββββββ | 133/399 [00:12<00:15, 17.12it/s]
Loading weights: 34%|ββββββββββββββββββββββββββββββββββββββββββββ | 136/399 [00:12<00:15, 17.44it/s]
Loading weights: 35%|βββββββββββββββββββββββββββββββββββββββββββββ | 138/399 [00:12<00:20, 12.95it/s]
Loading weights: 36%|βββββββββββββββββββββββββββββββββββββββββββββββ | 144/399 [00:12<00:13, 18.92it/s]
Loading weights: 37%|ββββββββββββββββββββββββββββββββββββββββββββββββ | 147/399 [00:13<00:13, 18.13it/s]
Loading weights: 38%|βββββββββββββββββββββββββββββββββββββββββββββββββ | 150/399 [00:13<00:16, 15.06it/s]
Loading weights: 39%|ββββββββββββββββββββββββββββββββββββββββββββββββββ | 155/399 [00:13<00:12, 19.94it/s]
Loading weights: 40%|βββββββββββββββββββββββββββββββββββββββββββββββββββ | 158/399 [00:13<00:12, 18.82it/s]
Loading weights: 40%|ββββββββββββββββββββββββββββββββββββββββββββββββββββ | 161/399 [00:14<00:15, 15.02it/s]
Loading weights: 41%|βββββββββββββββββββββββββββββββββββββββββββββββββββββ | 164/399 [00:14<00:13, 17.37it/s]
Loading weights: 42%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 169/399 [00:14<00:11, 19.71it/s]
Loading weights: 43%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 172/399 [00:14<00:15, 14.85it/s]
Loading weights: 44%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 175/399 [00:14<00:14, 15.07it/s]
Loading weights: 45%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 180/399 [00:15<00:11, 18.32it/s]
Loading weights: 46%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 183/399 [00:15<00:14, 15.37it/s]
Loading weights: 47%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 188/399 [00:15<00:11, 18.75it/s]
Loading weights: 48%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 191/399 [00:15<00:11, 18.55it/s]
Loading weights: 49%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 194/399 [00:16<00:14, 14.11it/s]
Loading weights: 50%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 198/399 [00:16<00:11, 17.47it/s]
Loading weights: 51%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 202/399 [00:16<00:10, 19.32it/s]
Loading weights: 51%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 205/399 [00:16<00:12, 15.11it/s]
Loading weights: 53%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 210/399 [00:16<00:09, 20.12it/s]
Loading weights: 53%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 213/399 [00:16<00:09, 18.82it/s]
Loading weights: 54%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 216/399 [00:17<00:12, 14.39it/s]
Loading weights: 55%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 221/399 [00:17<00:09, 18.79it/s]
Loading weights: 56%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 224/399 [00:17<00:09, 18.32it/s]
Loading weights: 57%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 227/399 [00:17<00:12, 14.18it/s]
Loading weights: 58%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 232/399 [00:18<00:09, 18.30it/s]
Loading weights: 59%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 235/399 [00:18<00:09, 17.20it/s]
Loading weights: 60%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 238/399 [00:18<00:11, 14.37it/s]
Loading weights: 61%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 243/399 [00:18<00:08, 18.30it/s]
Loading weights: 62%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 246/399 [00:18<00:08, 17.51it/s]
Loading weights: 62%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 248/399 [00:19<00:11, 13.14it/s]
Loading weights: 64%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 254/399 [00:19<00:07, 19.85it/s]
Loading weights: 64%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 257/399 [00:19<00:08, 17.54it/s]
Loading weights: 65%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 260/399 [00:19<00:09, 14.15it/s]
Loading weights: 66%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 265/399 [00:20<00:07, 18.43it/s]
Loading weights: 67%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 268/399 [00:20<00:07, 17.42it/s]
Loading weights: 68%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 271/399 [00:20<00:09, 14.20it/s]
Loading weights: 69%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 276/399 [00:20<00:06, 17.80it/s]
Loading weights: 70%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 279/399 [00:20<00:06, 18.10it/s]
Loading weights: 71%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 282/399 [00:21<00:08, 14.29it/s]
Loading weights: 72%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 287/399 [00:21<00:05, 18.72it/s]
Loading weights: 73%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 290/399 [00:21<00:05, 18.48it/s]
Loading weights: 73%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 293/399 [00:21<00:06, 15.43it/s]
Loading weights: 74%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 296/399 [00:22<00:06, 15.84it/s]
Loading weights: 75%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 301/399 [00:22<00:05, 18.39it/s]
Loading weights: 76%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 303/399 [00:22<00:07, 13.07it/s]
Loading weights: 77%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 307/399 [00:22<00:05, 16.92it/s]
Loading weights: 78%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 311/399 [00:22<00:05, 16.93it/s]
Loading weights: 79%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 314/399 [00:23<00:06, 13.81it/s]
Loading weights: 80%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 319/399 [00:23<00:04, 18.11it/s]
Loading weights: 81%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 323/399 [00:23<00:03, 19.10it/s]
Loading weights: 82%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 326/399 [00:23<00:04, 16.22it/s]
Loading weights: 83%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 331/399 [00:23<00:03, 21.47it/s]
Loading weights: 84%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 334/399 [00:24<00:03, 19.38it/s]
Loading weights: 84%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 337/399 [00:24<00:04, 15.30it/s]
Loading weights: 86%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 342/399 [00:24<00:02, 19.20it/s]
Loading weights: 86%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 345/399 [00:24<00:03, 17.30it/s]
Loading weights: 87%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 348/399 [00:25<00:03, 14.64it/s]
Loading weights: 88%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 352/399 [00:25<00:02, 17.83it/s]
Loading weights: 89%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 356/399 [00:25<00:02, 15.13it/s]
Loading weights: 90%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 358/399 [00:25<00:02, 14.28it/s]
Loading weights: 91%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 364/399 [00:25<00:01, 19.27it/s]
Loading weights: 92%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 367/399 [00:26<00:01, 18.23it/s]
Loading weights: 92%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 369/399 [00:26<00:02, 13.68it/s]
Loading weights: 94%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 375/399 [00:26<00:01, 19.67it/s]
Loading weights: 95%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 378/399 [00:26<00:01, 18.52it/s]
Loading weights: 95%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 381/399 [00:27<00:01, 14.06it/s]
Loading weights: 96%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 385/399 [00:27<00:00, 17.55it/s]
Loading weights: 97%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 389/399 [00:27<00:00, 14.34it/s]
Loading weights: 98%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 391/399 [00:27<00:00, 13.76it/s]
Loading weights: 99%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 397/399 [00:27<00:00, 19.89it/s]
Loading weights: 100%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 399/399 [00:27<00:00, 14.25it/s] |
| [2026-03-23 14:35:06,408] [INFO] [axolotl.loaders.model._configure_embedding_dtypes:361] [PID:26511] Converting modules to torch.bfloat16 |
| [2026-03-23 14:35:06,416] [DEBUG] [axolotl.loaders.model.log_gpu_memory_usage:127] [PID:26511] Memory usage after model load 18.735GB (+18.735GB allocated, +19.895GB reserved) |
| [2026-03-23 14:35:06,417] [DEBUG] [axolotl.loaders.adapter.load_lora:150] [PID:26511] Loading pretrained PEFT - LoRA |
| trainable params: 87,293,952 || all params: 8,278,029,312 || trainable%: 1.0545 |
| [2026-03-23 14:35:24,275] [DEBUG] [axolotl.loaders.model.log_gpu_memory_usage:127] [PID:26511] after adapters 15.907GB (+15.907GB allocated, +20.229GB reserved) |
|
Unloading and merging model: 0%| | 0/800 [00:00<?, ?it/s]
Unloading and merging model: 1%|β | 7/800 [00:00<00:23, 34.37it/s]
Unloading and merging model: 100%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 800/800 [00:00<00:00, 2935.10it/s] |
|
Writing model shards: 0%| | 0/1 [00:00<?, ?it/s]
Writing model shards: 100%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 1/1 [00:27<00:00, 27.68s/it]
Writing model shards: 100%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 1/1 [00:27<00:00, 27.68s/it] |
|
|