[2026-03-23 14:34:32,736] [DEBUG] [axolotl.utils.config.log_gpu_memory_usage:127] [PID:26511] baseline 0.000GB ()
[2026-03-23 14:34:32,739] [INFO] [axolotl.cli.config.load_cfg:341] [PID:26511] config:
{
  "activation_offloading": false,
  "adapter": "lora",
  "axolotl_config_path": "config.yml",
  "base_model": "Intelligent-Internet/II-Medical-8B",
  "base_model_config": "Intelligent-Internet/II-Medical-8B",
  "batch_size": 16,
  "bf16": true,
  "capabilities": {
    "bf16": true,
    "compute_capability": "sm_100",
    "fp8": true,
    "n_gpu": 1,
    "n_node": 1,
    "tf32": true
  },
  "chat_template": "tokenizer_default",
  "context_parallel_size": 1,
  "dataloader_num_workers": 1,
  "dataloader_pin_memory": true,
  "dataloader_prefetch_factor": 256,
  "dataset_num_proc": 24,
  "dataset_prepared_path": "last_run_prepared",
  "datasets": [
    {
      "message_property_mappings": {
        "content": "content",
        "role": "role"
      },
      "path": "ruslanmv/HealthCareMagic-100k",
      "trust_remote_code": false,
      "type": "alpaca"
    },
    {
      "message_property_mappings": {
        "content": "content",
        "role": "role"
      },
      "path": "medalpaca/medical_meadow_mediqa",
      "trust_remote_code": false,
      "type": "alpaca"
    },
    {
      "message_property_mappings": {
        "content": "content",
        "role": "role"
      },
      "path": "medalpaca/medical_meadow_medical_flashcards",
      "trust_remote_code": false,
      "type": "alpaca"
    },
    {
      "message_property_mappings": {
        "content": "content",
        "role": "role"
      },
      "path": "ruslanmv/icliniq-7k",
      "trust_remote_code": false,
      "type": {
        "field_instruction": "input",
        "field_output": "answer_icliniq",
        "format": "{instruction}",
        "no_input_format": "{instruction}",
        "system_prompt": "You are a helpful medical assistant."
      }
    },
    {
      "message_property_mappings": {
        "content": "content",
        "role": "role"
      },
      "path": "keivalya/MedQuad-MedicalQnADataset",
      "trust_remote_code": false,
      "type": {
        "field_instruction": "Question",
        "field_output": "Answer",
        "format": "{instruction}",
        "no_input_format": "{instruction}",
        "system_prompt": "You are a helpful medical assistant."
      }
    },
    {
      "message_property_mappings": {
        "content": "content",
        "role": "role"
      },
      "path": "mohammad2928git/complete_medical_symptom_dataset",
      "trust_remote_code": false,
      "type": {
        "field_instruction": "text",
        "field_output": "Name",
        "format": "{instruction}",
        "no_input_format": "{instruction}",
        "system_prompt": "You are a helpful medical diagnostic assistant. Based on the patient's symptoms, identify the most likely condition."
      }
    },
    {
      "field": "page_text",
      "message_property_mappings": {
        "content": "content",
        "role": "role"
      },
      "path": "gamino/wiki_medical_terms",
      "trust_remote_code": false,
      "type": "completion"
    }
  ],
  "ddp": false,
  "device": "cuda:0",
  "device_map": "auto",
  "dion_rank_fraction": 1.0,
  "dion_rank_multiple_of": 1,
  "eaft_alpha": 1.0,
  "eaft_k": 20,
  "env_capabilities": {
    "torch_version": "2.9.1"
  },
  "eval_batch_size": 8,
  "eval_causal_lm_metrics": [
    "sacrebleu",
    "comet",
    "ter",
    "chrf"
  ],
  "eval_max_new_tokens": 128,
  "eval_sample_packing": false,
  "eval_steps": 0.08333333333333333,
  "eval_table_size": 0,
  "evals_per_epoch": 4,
  "experimental_skip_move_to_device": true,
  "flash_attention": false,
  "fp16": false,
  "generate_samples": false,
  "generation_do_sample": true,
  "generation_max_new_tokens": 50,
  "generation_prompt_ratio": 0.5,
  "generation_temperature": 0.7,
  "gradient_accumulation_steps": 1,
  "gradient_checkpointing": true,
  "gradient_checkpointing_kwargs": {
    "use_reentrant": true
  },
  "group_by_length": false,
  "include_tkps": true,
  "is_falcon_derived_model": false,
  "is_llama_derived_model": false,
  "is_mistral_derived_model": false,
  "learning_rate": 0.0002,
  "lisa_layers_attribute": "model.layers",
  "load_best_model_at_end": false,
  "load_in_4bit": false,
  "load_in_8bit": false,
  "local_rank": 0,
  "logging_steps": 1,
  "lora_alpha": 16,
  "lora_dropout": 0.05,
  "lora_model_dir": "./medical-llm-out",
  "lora_r": 32,
  "lora_target_modules": [
    "q_proj",
    "v_proj",
    "k_proj",
    "o_proj",
    "gate_proj",
    "down_proj",
    "up_proj"
  ],
  "loraplus_lr_embedding": 1e-06,
  "lr_scheduler": "cosine",
  "mean_resizing_embeddings": false,
  "merge_lora": true,
  "micro_batch_size": 16,
  "model_config_type": "qwen3",
  "num_epochs": 3.0,
  "num_generation_samples": 3,
  "optimizer": "paged_adamw_32bit",
  "otel_metrics_host": "localhost",
  "otel_metrics_port": 8000,
  "output_dir": "./medical-llm-merged",
  "pad_to_sequence_len": true,
  "pretrain_multipack_attn": true,
  "profiler_steps_start": 0,
  "qlora_sharded_model_loading": false,
  "quantize_moe_experts": false,
  "ray_num_workers": 1,
  "remove_unused_columns": false,
  "resources_per_worker": {
    "GPU": 1
  },
  "sample_packing": true,
  "sample_packing_bin_size": 200,
  "sample_packing_group_size": 100000,
  "save_only_model": false,
  "save_safetensors": true,
  "save_steps": 0.3333333333333333,
  "saves_per_epoch": 1,
  "sequence_len": 4096,
  "shuffle_before_merging_datasets": false,
  "shuffle_merged_datasets": true,
  "skip_prepare_dataset": false,
  "streaming_multipack_buffer_size": 10000,
  "strict": false,
  "tensor_parallel_size": 1,
  "tf32": true,
  "tiled_mlp_use_original_mlp": true,
  "tokenizer_config": "Intelligent-Internet/II-Medical-8B",
  "tokenizer_save_jinja_files": true,
  "tokenizer_type": "AutoTokenizer",
  "torch_dtype": "torch.bfloat16",
  "train_on_inputs": false,
  "trl": {
    "async_prefetch": false,
    "log_completions": false,
    "mask_truncated_completions": false,
    "ref_model_mixup_alpha": 0.9,
    "ref_model_sync_steps": 64,
    "replay_buffer_size": 0,
    "replay_recompute_logps": true,
    "reroll_max_groups": 1,
    "reroll_start_fraction": 1.0,
    "reward_num_workers": 1,
    "scale_rewards": true,
    "skip_zero_advantage_batches": true,
    "sync_ref_model": false,
    "use_data_producer": false,
    "use_vllm": false,
    "vllm_lora_sync": false,
    "vllm_server_host": "0.0.0.0",
    "vllm_server_port": 8000
  },
  "type_of_model": "AutoModelForCausalLM",
  "use_otel_metrics": false,
  "use_ray": false,
  "val_set_size": 0.05,
  "vllm": {
    "device": "auto",
    "dtype": "auto",
    "gpu_memory_utilization": 0.9,
    "host": "0.0.0.0",
    "port": 8000
  },
  "warmup_steps": 10,
  "weight_decay": 0.0,
  "world_size": 1
}
[2026-03-23 14:34:32,740] [INFO] [axolotl.cli.utils.load.load_model_and_tokenizer:40] [PID:26511] loading tokenizer... Intelligent-Internet/II-Medical-8B
[2026-03-23 14:34:34,848] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:299] [PID:26511] EOS: 151645 / <|im_end|>
[2026-03-23 14:34:34,849] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:300] [PID:26511] BOS: None / None
[2026-03-23 14:34:34,849] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:301] [PID:26511] PAD: 151643 / <|endoftext|>
[2026-03-23 14:34:34,849] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:302] [PID:26511] UNK: None / None
[2026-03-23 14:34:34,849] [INFO] [axolotl.cli.utils.load.load_model_and_tokenizer:43] [PID:26511] loading model...
[2026-03-23 14:34:35,028] [DEBUG] [axolotl.monkeypatch.transformers.trainer_loss_calc.patch_evaluation_loop:91] [PID:26511] Patched Trainer.evaluation_loop with nanmean loss calculation
[2026-03-23 14:34:35,033] [DEBUG] [axolotl.monkeypatch.transformers.trainer_loss_calc.patch_maybe_log_save_evaluate:142] [PID:26511] Patched Trainer._maybe_log_save_evaluate with nanmean loss calculation
[2026-03-23 14:34:35,035] [INFO] [axolotl.loaders.patch_manager._apply_multipack_patches:402] [PID:26511] Applying multipack dataloader patch for sample packing...
Loading weights:   0%|                                                                                                                                           | 0/399 [00:00<?, ?it/s]Loading weights:   0%|▎                                                                                                                                  | 1/399 [00:02<13:49,  2.09s/it]Loading weights:   1%|▋                                                                                                                                  | 2/399 [00:03<12:50,  1.94s/it]Loading weights:   1%|▉                                                                                                                                  | 3/399 [00:04<07:32,  1.14s/it]Loading weights:   1%|█▋                                                                                                                                 | 5/399 [00:04<03:34,  1.84it/s]Loading weights:   2%|█▉                                                                                                                                 | 6/399 [00:04<02:50,  2.31it/s]Loading weights:   3%|███▉                                                                                                                              | 12/399 [00:04<00:55,  6.96it/s]Loading weights:   4%|████▉                                                                                                                             | 15/399 [00:04<00:45,  8.42it/s]Loading weights:   4%|█████▌                                                                                                                            | 17/399 [00:05<00:48,  7.93it/s]Loading weights:   6%|███████▏                                                                                                                          | 22/399 [00:05<00:28, 13.01it/s]Loading weights:   7%|████████▍                                                                                                                         | 26/399 [00:05<00:27, 13.79it/s]Loading weights:   7%|█████████▍                                                                                                                        | 29/399 [00:05<00:31, 11.58it/s]Loading weights:   9%|███████████                                                                                                                       | 34/399 [00:06<00:23, 15.26it/s]Loading weights:   9%|████████████                                                                                                                      | 37/399 [00:06<00:24, 14.85it/s]Loading weights:  10%|████████████▋                                                                                                                     | 39/399 [00:06<00:32, 11.22it/s]Loading weights:  11%|██████████████                                                                                                                    | 43/399 [00:06<00:23, 14.91it/s]Loading weights:  12%|███████████████▎                                                                                                                  | 47/399 [00:06<00:23, 15.23it/s]Loading weights:  12%|███████████████▉                                                                                                                  | 49/399 [00:07<00:24, 14.13it/s]Loading weights:  13%|████████████████▌                                                                                                                 | 51/399 [00:07<00:25, 13.41it/s]Loading weights:  14%|█████████████████▌                                                                                                                | 54/399 [00:07<00:23, 14.52it/s]Loading weights:  15%|███████████████████▏                                                                                                              | 59/399 [00:07<00:18, 18.32it/s]Loading weights:  15%|███████████████████▊                                                                                                              | 61/399 [00:08<00:26, 12.54it/s]Loading weights:  16%|█████████████████████▏                                                                                                            | 65/399 [00:08<00:20, 16.48it/s]Loading weights:  17%|██████████████████████▍                                                                                                           | 69/399 [00:08<00:21, 15.57it/s]Loading weights:  18%|███████████████████████▏                                                                                                          | 71/399 [00:08<00:23, 14.07it/s]Loading weights:  18%|███████████████████████▊                                                                                                          | 73/399 [00:08<00:23, 13.85it/s]Loading weights:  20%|█████████████████████████▍                                                                                                        | 78/399 [00:08<00:16, 19.26it/s]Loading weights:  20%|██████████████████████████▍                                                                                                       | 81/399 [00:09<00:18, 17.58it/s]Loading weights:  21%|███████████████████████████▎                                                                                                      | 84/399 [00:09<00:22, 14.24it/s]Loading weights:  22%|████████████████████████████▉                                                                                                     | 89/399 [00:09<00:17, 18.01it/s]Loading weights:  23%|█████████████████████████████▉                                                                                                    | 92/399 [00:09<00:18, 16.72it/s]Loading weights:  24%|██████████████████████████████▋                                                                                                   | 94/399 [00:10<00:23, 13.16it/s]Loading weights:  25%|████████████████████████████████▎                                                                                                | 100/399 [00:10<00:15, 19.26it/s]Loading weights:  26%|█████████████████████████████████▎                                                                                               | 103/399 [00:10<00:16, 17.51it/s]Loading weights:  27%|██████████████████████████████████▎                                                                                              | 106/399 [00:10<00:21, 13.95it/s]Loading weights:  28%|███████████████████████████████████▉                                                                                             | 111/399 [00:10<00:15, 18.46it/s]Loading weights:  29%|████████████████████████████████████▊                                                                                            | 114/399 [00:11<00:16, 17.76it/s]Loading weights:  29%|█████████████████████████████████████▊                                                                                           | 117/399 [00:11<00:20, 14.07it/s]Loading weights:  31%|███████████████████████████████████████▍                                                                                         | 122/399 [00:11<00:15, 17.70it/s]Loading weights:  31%|████████████████████████████████████████▍                                                                                        | 125/399 [00:11<00:16, 17.12it/s]Loading weights:  32%|█████████████████████████████████████████                                                                                        | 127/399 [00:12<00:21, 12.64it/s]Loading weights:  33%|███████████████████████████████████████████                                                                                      | 133/399 [00:12<00:15, 17.12it/s]Loading weights:  34%|███████████████████████████████████████████▉                                                                                     | 136/399 [00:12<00:15, 17.44it/s]Loading weights:  35%|████████████████████████████████████████████▌                                                                                    | 138/399 [00:12<00:20, 12.95it/s]Loading weights:  36%|██████████████████████████████████████████████▌                                                                                  | 144/399 [00:12<00:13, 18.92it/s]Loading weights:  37%|███████████████████████████████████████████████▌                                                                                 | 147/399 [00:13<00:13, 18.13it/s]Loading weights:  38%|████████████████████████████████████████████████▍                                                                                | 150/399 [00:13<00:16, 15.06it/s]Loading weights:  39%|██████████████████████████████████████████████████                                                                               | 155/399 [00:13<00:12, 19.94it/s]Loading weights:  40%|███████████████████████████████████████████████████                                                                              | 158/399 [00:13<00:12, 18.82it/s]Loading weights:  40%|████████████████████████████████████████████████████                                                                             | 161/399 [00:14<00:15, 15.02it/s]Loading weights:  41%|█████████████████████████████████████████████████████                                                                            | 164/399 [00:14<00:13, 17.37it/s]Loading weights:  42%|██████████████████████████████████████████████████████▋                                                                          | 169/399 [00:14<00:11, 19.71it/s]Loading weights:  43%|███████████████████████████████████████████████████████▌                                                                         | 172/399 [00:14<00:15, 14.85it/s]Loading weights:  44%|████████████████████████████████████████████████████████▌                                                                        | 175/399 [00:14<00:14, 15.07it/s]Loading weights:  45%|██████████████████████████████████████████████████████████▏                                                                      | 180/399 [00:15<00:11, 18.32it/s]Loading weights:  46%|███████████████████████████████████████████████████████████▏                                                                     | 183/399 [00:15<00:14, 15.37it/s]Loading weights:  47%|████████████████████████████████████████████████████████████▊                                                                    | 188/399 [00:15<00:11, 18.75it/s]Loading weights:  48%|█████████████████████████████████████████████████████████████▊                                                                   | 191/399 [00:15<00:11, 18.55it/s]Loading weights:  49%|██████████████████████████████████████████████████████████████▋                                                                  | 194/399 [00:16<00:14, 14.11it/s]Loading weights:  50%|████████████████████████████████████████████████████████████████                                                                 | 198/399 [00:16<00:11, 17.47it/s]Loading weights:  51%|█████████████████████████████████████████████████████████████████▎                                                               | 202/399 [00:16<00:10, 19.32it/s]Loading weights:  51%|██████████████████████████████████████████████████████████████████▎                                                              | 205/399 [00:16<00:12, 15.11it/s]Loading weights:  53%|███████████████████████████████████████████████████████████████████▉                                                             | 210/399 [00:16<00:09, 20.12it/s]Loading weights:  53%|████████████████████████████████████████████████████████████████████▊                                                            | 213/399 [00:16<00:09, 18.82it/s]Loading weights:  54%|█████████████████████████████████████████████████████████████████████▊                                                           | 216/399 [00:17<00:12, 14.39it/s]Loading weights:  55%|███████████████████████████████████████████████████████████████████████▍                                                         | 221/399 [00:17<00:09, 18.79it/s]Loading weights:  56%|████████████████████████████████████████████████████████████████████████▍                                                        | 224/399 [00:17<00:09, 18.32it/s]Loading weights:  57%|█████████████████████████████████████████████████████████████████████████▍                                                       | 227/399 [00:17<00:12, 14.18it/s]Loading weights:  58%|███████████████████████████████████████████████████████████████████████████                                                      | 232/399 [00:18<00:09, 18.30it/s]Loading weights:  59%|███████████████████████████████████████████████████████████████████████████▉                                                     | 235/399 [00:18<00:09, 17.20it/s]Loading weights:  60%|████████████████████████████████████████████████████████████████████████████▉                                                    | 238/399 [00:18<00:11, 14.37it/s]Loading weights:  61%|██████████████████████████████████████████████████████████████████████████████▌                                                  | 243/399 [00:18<00:08, 18.30it/s]Loading weights:  62%|███████████████████████████████████████████████████████████████████████████████▌                                                 | 246/399 [00:18<00:08, 17.51it/s]Loading weights:  62%|████████████████████████████████████████████████████████████████████████████████▏                                                | 248/399 [00:19<00:11, 13.14it/s]Loading weights:  64%|██████████████████████████████████████████████████████████████████████████████████                                               | 254/399 [00:19<00:07, 19.85it/s]Loading weights:  64%|███████████████████████████████████████████████████████████████████████████████████                                              | 257/399 [00:19<00:08, 17.54it/s]Loading weights:  65%|████████████████████████████████████████████████████████████████████████████████████                                             | 260/399 [00:19<00:09, 14.15it/s]Loading weights:  66%|█████████████████████████████████████████████████████████████████████████████████████▋                                           | 265/399 [00:20<00:07, 18.43it/s]Loading weights:  67%|██████████████████████████████████████████████████████████████████████████████████████▋                                          | 268/399 [00:20<00:07, 17.42it/s]Loading weights:  68%|███████████████████████████████████████████████████████████████████████████████████████▌                                         | 271/399 [00:20<00:09, 14.20it/s]Loading weights:  69%|█████████████████████████████████████████████████████████████████████████████████████████▏                                       | 276/399 [00:20<00:06, 17.80it/s]Loading weights:  70%|██████████████████████████████████████████████████████████████████████████████████████████▏                                      | 279/399 [00:20<00:06, 18.10it/s]Loading weights:  71%|███████████████████████████████████████████████████████████████████████████████████████████▏                                     | 282/399 [00:21<00:08, 14.29it/s]Loading weights:  72%|████████████████████████████████████████████████████████████████████████████████████████████▊                                    | 287/399 [00:21<00:05, 18.72it/s]Loading weights:  73%|█████████████████████████████████████████████████████████████████████████████████████████████▊                                   | 290/399 [00:21<00:05, 18.48it/s]Loading weights:  73%|██████████████████████████████████████████████████████████████████████████████████████████████▋                                  | 293/399 [00:21<00:06, 15.43it/s]Loading weights:  74%|███████████████████████████████████████████████████████████████████████████████████████████████▋                                 | 296/399 [00:22<00:06, 15.84it/s]Loading weights:  75%|█████████████████████████████████████████████████████████████████████████████████████████████████▎                               | 301/399 [00:22<00:05, 18.39it/s]Loading weights:  76%|█████████████████████████████████████████████████████████████████████████████████████████████████▉                               | 303/399 [00:22<00:07, 13.07it/s]Loading weights:  77%|███████████████████████████████████████████████████████████████████████████████████████████████████▎                             | 307/399 [00:22<00:05, 16.92it/s]Loading weights:  78%|████████████████████████████████████████████████████████████████████████████████████████████████████▌                            | 311/399 [00:22<00:05, 16.93it/s]Loading weights:  79%|█████████████████████████████████████████████████████████████████████████████████████████████████████▌                           | 314/399 [00:23<00:06, 13.81it/s]Loading weights:  80%|███████████████████████████████████████████████████████████████████████████████████████████████████████▏                         | 319/399 [00:23<00:04, 18.11it/s]Loading weights:  81%|████████████████████████████████████████████████████████████████████████████████████████████████████████▍                        | 323/399 [00:23<00:03, 19.10it/s]Loading weights:  82%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▍                       | 326/399 [00:23<00:04, 16.22it/s]Loading weights:  83%|███████████████████████████████████████████████████████████████████████████████████████████████████████████                      | 331/399 [00:23<00:03, 21.47it/s]Loading weights:  84%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▉                     | 334/399 [00:24<00:03, 19.38it/s]Loading weights:  84%|████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                    | 337/399 [00:24<00:04, 15.30it/s]Loading weights:  86%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                  | 342/399 [00:24<00:02, 19.20it/s]Loading weights:  86%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                 | 345/399 [00:24<00:03, 17.30it/s]Loading weights:  87%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                | 348/399 [00:25<00:03, 14.64it/s]Loading weights:  88%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊               | 352/399 [00:25<00:02, 17.83it/s]Loading weights:  89%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████              | 356/399 [00:25<00:02, 15.13it/s]Loading weights:  90%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋             | 358/399 [00:25<00:02, 14.28it/s]Loading weights:  91%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋           | 364/399 [00:25<00:01, 19.27it/s]Loading weights:  92%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋          | 367/399 [00:26<00:01, 18.23it/s]Loading weights:  92%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎         | 369/399 [00:26<00:02, 13.68it/s]Loading weights:  94%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏       | 375/399 [00:26<00:01, 19.67it/s]Loading weights:  95%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏      | 378/399 [00:26<00:01, 18.52it/s]Loading weights:  95%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏     | 381/399 [00:27<00:01, 14.06it/s]Loading weights:  96%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍    | 385/399 [00:27<00:00, 17.55it/s]Loading weights:  97%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊   | 389/399 [00:27<00:00, 14.34it/s]Loading weights:  98%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍  | 391/399 [00:27<00:00, 13.76it/s]Loading weights:  99%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎| 397/399 [00:27<00:00, 19.89it/s]Loading weights: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 399/399 [00:27<00:00, 14.25it/s]
[2026-03-23 14:35:06,408] [INFO] [axolotl.loaders.model._configure_embedding_dtypes:361] [PID:26511] Converting modules to torch.bfloat16
[2026-03-23 14:35:06,416] [DEBUG] [axolotl.loaders.model.log_gpu_memory_usage:127] [PID:26511] Memory usage after model load 18.735GB (+18.735GB allocated, +19.895GB reserved)
[2026-03-23 14:35:06,417] [DEBUG] [axolotl.loaders.adapter.load_lora:150] [PID:26511] Loading pretrained PEFT - LoRA
trainable params: 87,293,952 || all params: 8,278,029,312 || trainable%: 1.0545
[2026-03-23 14:35:24,275] [DEBUG] [axolotl.loaders.model.log_gpu_memory_usage:127] [PID:26511] after adapters 15.907GB (+15.907GB allocated, +20.229GB reserved)
Unloading and merging model:   0%|                                                                                                                               | 0/800 [00:00<?, ?it/s]Unloading and merging model:   1%|█                                                                                                                      | 7/800 [00:00<00:23, 34.37it/s]Unloading and merging model: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 800/800 [00:00<00:00, 2935.10it/s]
Writing model shards:   0%|                                                                                                                                        | 0/1 [00:00<?, ?it/s]Writing model shards: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:27<00:00, 27.68s/it]Writing model shards: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:27<00:00, 27.68s/it]