File size: 15,768 Bytes
dd11053 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 | [2026-02-21 20:37:21,574] [DEBUG] [axolotl.utils.config.log_gpu_memory_usage:127] [PID:5467] baseline 0.000GB (+0.000GB allocated)
[2026-02-21 20:37:21,574] [INFO] [axolotl.cli.config.load_cfg:259] [PID:5467] config:
{
"activation_offloading": false,
"adapter": "lora",
"axolotl_config_path": "src/training/qwen3_axolotl_config.yml",
"base_model": "Qwen/Qwen2-0.5B",
"base_model_config": "Qwen/Qwen2-0.5B",
"batch_size": 8,
"bf16": false,
"capabilities": {
"bf16": true,
"fp8": false,
"n_gpu": 1,
"n_node": 1
},
"context_parallel_size": 1,
"dataloader_num_workers": 1,
"dataloader_pin_memory": true,
"dataloader_prefetch_factor": 256,
"dataset_num_proc": 8,
"dataset_prepared_path": "/Users/duoyun/Desktop/zsh-llm-cli-autocomplete-tool/last_run_prepared",
"datasets": [
{
"message_property_mappings": {
"content": "content",
"role": "role"
},
"path": "/Users/duoyun/Desktop/zsh-llm-cli-autocomplete-tool/src/training/data_splits_axolotl/train_axolotl.jsonl",
"trust_remote_code": false,
"type": "alpaca"
}
],
"ddp": false,
"device": "mps",
"dion_rank_fraction": 1.0,
"dion_rank_multiple_of": 1,
"env_capabilities": {
"torch_version": "2.10.0"
},
"eval_batch_size": 1,
"eval_causal_lm_metrics": [
"sacrebleu",
"comet",
"ter",
"chrf"
],
"eval_max_new_tokens": 128,
"eval_table_size": 0,
"experimental_skip_move_to_device": true,
"fp16": false,
"fp8": false,
"gradient_accumulation_steps": 8,
"gradient_checkpointing": true,
"gradient_checkpointing_kwargs": {
"use_reentrant": true
},
"group_by_length": false,
"include_tkps": true,
"is_falcon_derived_model": false,
"is_llama_derived_model": false,
"is_mistral_derived_model": false,
"learning_rate": 0.0002,
"lisa_layers_attribute": "model.layers",
"load_best_model_at_end": false,
"load_in_4bit": true,
"load_in_8bit": false,
"local_rank": 0,
"logging_steps": 10,
"lora_alpha": 16,
"lora_dropout": 0.05,
"lora_modules_to_save": [
"embed_tokens",
"lm_head"
],
"lora_r": 8,
"lora_target_modules": [
"q_proj",
"k_proj",
"v_proj",
"o_proj",
"gate_proj",
"up_proj",
"down_proj"
],
"loraplus_lr_embedding": 1e-06,
"lr_scheduler": "cosine",
"mean_resizing_embeddings": false,
"micro_batch_size": 1,
"model_config_type": "qwen2",
"num_epochs": 2.0,
"optimizer": "adamw_torch",
"otel_metrics_host": "localhost",
"otel_metrics_port": 8000,
"output_dir": "/Users/duoyun/Desktop/zsh-llm-cli-autocomplete-tool/zsh-lora-output",
"pad_to_sequence_len": true,
"pretrain_multipack_attn": true,
"profiler_steps_start": 0,
"qlora_sharded_model_loading": false,
"ray_num_workers": 1,
"resources_per_worker": {
"GPU": 1
},
"sample_packing": false,
"sample_packing_bin_size": 200,
"sample_packing_group_size": 100000,
"save_only_model": false,
"save_safetensors": true,
"save_steps": 15,
"save_total_limit": 2,
"sequence_len": 512,
"shuffle_before_merging_datasets": false,
"shuffle_merged_datasets": true,
"skip_prepare_dataset": false,
"streaming_multipack_buffer_size": 10000,
"strict": false,
"tensor_parallel_size": 1,
"tf32": false,
"tiled_mlp_use_original_mlp": true,
"tokenizer_config": "Qwen/Qwen2-0.5B",
"tokenizer_save_jinja_files": true,
"tokenizer_type": "AutoTokenizer",
"torch_dtype": "torch.float32",
"train_on_inputs": false,
"trl": {
"log_completions": false,
"mask_truncated_completions": false,
"ref_model_mixup_alpha": 0.9,
"ref_model_sync_steps": 64,
"scale_rewards": true,
"sync_ref_model": false,
"use_vllm": false,
"vllm_server_host": "0.0.0.0",
"vllm_server_port": 8000
},
"trust_remote_code": true,
"type_of_model": "AutoModelForCausalLM",
"use_otel_metrics": false,
"use_ray": false,
"val_set_size": 0.0,
"vllm": {
"device": "auto",
"dtype": "auto",
"gpu_memory_utilization": 0.9,
"host": "0.0.0.0",
"port": 8000
},
"wandb_mode": "disabled",
"warmup_steps": 50,
"weight_decay": 0.0,
"world_size": 1
}
[2026-02-21 20:37:22,676] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:285] [PID:5467] EOS: 151643 / <|endoftext|>
[2026-02-21 20:37:22,677] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:286] [PID:5467] BOS: None / None
[2026-02-21 20:37:22,677] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:287] [PID:5467] PAD: 151643 / <|endoftext|>
[2026-02-21 20:37:22,677] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:288] [PID:5467] UNK: None / None
[2026-02-21 20:37:22,679] [INFO] [axolotl.utils.data.shared.load_preprocessed_dataset:475] [PID:5467] Loading prepared dataset from disk at /Users/duoyun/Desktop/zsh-llm-cli-autocomplete-tool/last_run_prepared/41f31c3c9bc9eb4eb6e943fbbbb74dda...
[2026-02-21 20:37:22,701] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:417] [PID:5467] total_num_tokens: 12_122
[2026-02-21 20:37:22,703] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:435] [PID:5467] `total_supervised_tokens: 1_660`
[2026-02-21 20:37:22,703] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:533] [PID:5467] total_num_steps: 49
[2026-02-21 20:37:22,703] [INFO] [axolotl.utils.data.sft._prepare_standard_dataset:121] [PID:5467] Maximum number of steps set at 49
[2026-02-21 20:37:22,735] [DEBUG] [axolotl.train.setup_model_and_tokenizer:70] [PID:5467] loading tokenizer... Qwen/Qwen2-0.5B
[2026-02-21 20:37:23,594] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:285] [PID:5467] EOS: 151643 / <|endoftext|>
[2026-02-21 20:37:23,594] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:286] [PID:5467] BOS: None / None
[2026-02-21 20:37:23,594] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:287] [PID:5467] PAD: 151643 / <|endoftext|>
[2026-02-21 20:37:23,594] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:288] [PID:5467] UNK: None / None
[2026-02-21 20:37:23,594] [DEBUG] [axolotl.train.setup_model_and_tokenizer:82] [PID:5467] Loading model
[2026-02-21 20:37:23,742] [DEBUG] [axolotl.monkeypatch.transformers.trainer_loss_calc.patch_evaluation_loop:87] [PID:5467] Patched Trainer.evaluation_loop with nanmean loss calculation
[2026-02-21 20:37:23,744] [DEBUG] [axolotl.monkeypatch.transformers.trainer_loss_calc.patch_maybe_log_save_evaluate:138] [PID:5467] Patched Trainer._maybe_log_save_evaluate with nanmean loss calculation
[2026-02-21 20:37:23,752] [WARNING] [transformers.modeling_utils.warning_once:328] [PID:5467] `torch_dtype` is deprecated! Use `dtype` instead!
[2026-02-21 20:37:27,045] [INFO] [axolotl.loaders.model._prepare_model_for_quantization:853] [PID:5467] converting PEFT model w/ prepare_model_for_kbit_training
[2026-02-21 20:37:27,047] [INFO] [axolotl.loaders.model._configure_embedding_dtypes:347] [PID:5467] Converting modules to torch.float32
[2026-02-21 20:37:27,049] [DEBUG] [axolotl.loaders.model.log_gpu_memory_usage:127] [PID:5467] Memory usage after model load 0.000GB (+0.000GB allocated)
[2026-02-21 20:37:27,053] [WARNING] [py.warnings._showwarnmsg:110] [PID:5467] /Users/duoyun/Desktop/zsh-llm-cli-autocomplete-tool/venv/lib/python3.13/site-packages/peft/tuners/tuners_utils.py:1225: UserWarning: Model has `tie_word_embeddings=True` and a tied layer is part of the adapter, but `ensure_weight_tying` is not set to True. This can lead to complications, for example when merging the adapter or converting your model to formats other than safetensors. Check the discussion here: https://github.com/huggingface/peft/issues/2777
warnings.warn(msg)
trainable params: 276,668,416 || all params: 770,701,184 || trainable%: 35.8983
[2026-02-21 20:37:27,656] [DEBUG] [axolotl.loaders.model.log_gpu_memory_usage:127] [PID:5467] after adapters 0.000GB (+0.000GB allocated)
[2026-02-21 20:37:33,551] [INFO] [axolotl.train.save_initial_configs:413] [PID:5467] Pre-saving adapter config to /Users/duoyun/Desktop/zsh-llm-cli-autocomplete-tool/zsh-lora-output...
[2026-02-21 20:37:33,553] [INFO] [axolotl.train.save_initial_configs:417] [PID:5467] Pre-saving tokenizer to /Users/duoyun/Desktop/zsh-llm-cli-autocomplete-tool/zsh-lora-output...
[2026-02-21 20:37:33,733] [INFO] [axolotl.train.save_initial_configs:422] [PID:5467] Pre-saving model config to /Users/duoyun/Desktop/zsh-llm-cli-autocomplete-tool/zsh-lora-output...
[2026-02-21 20:37:33,737] [INFO] [axolotl.train.execute_training:212] [PID:5467] Starting trainer...
[2026-02-21 20:37:33,737] [WARNING] [transformers.trainer._align_special_tokens:982] [PID:5467] The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': None, 'pad_token_id': 151643}.
0%| | 0/49 [00:00<?, ?it/s][2026-02-21 20:37:34,099] [WARNING] [py.warnings._showwarnmsg:110] [PID:5467] /Users/duoyun/Desktop/zsh-llm-cli-autocomplete-tool/venv/lib/python3.13/site-packages/torch/utils/data/dataloader.py:1118: UserWarning: 'pin_memory' argument is set as true but not supported on MPS now, device pinned memory won't be used.
super().__init__(loader)
2%|β | 1/49 [00:49<39:37, 49.52s/it]
4%|β | 2/49 [01:38<38:45, 49.48s/it]
6%|β | 3/49 [02:39<41:49, 54.56s/it]
8%|β | 4/49 [03:19<36:40, 48.90s/it]
10%|β | 5/49 [04:06<35:15, 48.07s/it]
12%|ββ | 6/49 [04:46<32:31, 45.39s/it]
14%|ββ | 7/49 [05:33<32:04, 45.82s/it]
16%|ββ | 8/49 [06:19<31:27, 46.03s/it]
18%|ββ | 9/49 [06:59<29:27, 44.20s/it]
20%|ββ | 10/49 [07:39<27:45, 42.71s/it]
{'loss': 2.1003, 'grad_norm': 29.422929763793945, 'learning_rate': 3.6e-05, 'ppl': 8.16862, 'memory/max_active (GiB)': 4.94, 'memory/max_allocated (GiB)': 8.44, 'memory/device_reserved (GiB)': 0, 'tokens/train_per_sec_per_gpu': 0.3365485370159149, 'tokens/total': 40960, 'tokens/trainable': 732, 'epoch': 0.41}
20%|ββ | 10/49 [07:43<27:45, 42.71s/it]
22%|βββ | 11/49 [08:20<26:48, 42.33s/it]
24%|βββ | 12/49 [09:11<27:37, 44.80s/it]
27%|βββ | 13/49 [09:56<27:02, 45.07s/it]
29%|βββ | 14/49 [10:45<26:55, 46.15s/it]
31%|βββ | 15/49 [11:29<25:49, 45.58s/it][2026-02-21 20:49:03,976] [INFO] [axolotl.core.trainers.base._save:721] [PID:5467] Saving model checkpoint to /Users/duoyun/Desktop/zsh-llm-cli-autocomplete-tool/zsh-lora-output/checkpoint-15
33%|ββββ | 16/49 [12:34<28:15, 51.37s/it]
35%|ββββ | 17/49 [13:17<26:04, 48.90s/it]
37%|ββββ | 18/49 [14:01<24:26, 47.32s/it]
39%|ββββ | 19/49 [14:52<24:09, 48.31s/it]
41%|ββββ | 20/49 [15:42<23:36, 48.83s/it]
{'loss': 1.3313, 'grad_norm': 20.819232940673828, 'learning_rate': 7.6e-05, 'ppl': 3.78596, 'memory/max_active (GiB)': 4.94, 'memory/max_allocated (GiB)': 8.44, 'memory/device_reserved (GiB)': 0, 'tokens/train_per_sec_per_gpu': 0.13703587651252747, 'tokens/total': 81920, 'tokens/trainable': 1375, 'epoch': 0.82}
41%|ββββ | 20/49 [15:47<23:36, 48.83s/it]
43%|βββββ | 21/49 [16:32<22:58, 49.25s/it]
45%|βββββ | 22/49 [17:12<20:53, 46.42s/it]
47%|βββββ | 23/49 [17:55<19:46, 45.63s/it]
49%|βββββ | 24/49 [18:36<18:24, 44.19s/it]
51%|βββββ | 25/49 [19:00<15:11, 37.96s/it][2026-02-21 20:56:34,355] [WARNING] [py.warnings._showwarnmsg:110] [PID:5467] /Users/duoyun/Desktop/zsh-llm-cli-autocomplete-tool/venv/lib/python3.13/site-packages/torch/utils/data/dataloader.py:1118: UserWarning: 'pin_memory' argument is set as true but not supported on MPS now, device pinned memory won't be used.
super().__init__(loader)
53%|ββββββ | 26/49 [19:50<16:01, 41.81s/it]
55%|ββββββ | 27/49 [20:32<15:20, 41.84s/it]
57%|ββββββ | 28/49 [21:19<15:08, 43.24s/it]
59%|ββββββ | 29/49 [22:10<15:13, 45.67s/it]
61%|ββββββ | 30/49 [23:16<16:22, 51.72s/it]
{'loss': 0.8107, 'grad_norm': 10.385747909545898, 'learning_rate': 0.000116, 'ppl': 2.24948, 'memory/max_active (GiB)': 4.94, 'memory/max_allocated (GiB)': 8.44, 'memory/device_reserved (GiB)': 0, 'tokens/train_per_sec_per_gpu': 0.2631860375404358, 'tokens/total': 119808, 'tokens/trainable': 1990, 'epoch': 1.21}
61%|ββββββ | 30/49 [23:22<16:22, 51.72s/it][2026-02-21 21:00:56,360] [INFO] [axolotl.core.trainers.base._save:721] [PID:5467] Saving model checkpoint to /Users/duoyun/Desktop/zsh-llm-cli-autocomplete-tool/zsh-lora-output/checkpoint-30
63%|βββββββ | 31/49 [24:24<16:58, 56.56s/it]
65%|βββββββ | 32/49 [25:20<16:00, 56.51s/it]
67%|βββββββ | 33/49 [26:27<15:51, 59.48s/it]
69%|βββββββ | 34/49 [27:21<14:26, 57.79s/it]
71%|ββββββββ | 35/49 [28:10<12:52, 55.20s/it]
73%|ββββββββ | 36/49 [28:50<11:00, 50.77s/it]
76%|ββββββββ | 37/49 [29:32<09:37, 48.10s/it]
78%|ββββββββ | 38/49 [30:19<08:46, 47.83s/it]
80%|ββββββββ | 39/49 [31:07<07:57, 47.79s/it]
82%|βββββββββ | 40/49 [31:54<07:07, 47.45s/it]
{'loss': 0.4243, 'grad_norm': 9.247703552246094, 'learning_rate': 0.00015600000000000002, 'ppl': 1.52852, 'memory/max_active (GiB)': 4.94, 'memory/max_allocated (GiB)': 8.44, 'memory/device_reserved (GiB)': 0, 'tokens/train_per_sec_per_gpu': 0.12217021733522415, 'tokens/total': 160768, 'tokens/trainable': 2671, 'epoch': 1.62}
82%|βββββββββ | 40/49 [31:59<07:07, 47.45s/it]
84%|βββββββββ | 41/49 [33:01<07:06, 53.31s/it]
86%|βββββββββ | 42/49 [33:56<06:17, 53.91s/it]
88%|βββββββββ | 43/49 [34:57<05:35, 55.99s/it]
90%|βββββββββ | 44/49 [35:49<04:34, 54.85s/it]
92%|ββββββββββ| 45/49 [36:49<03:45, 56.49s/it][2026-02-21 21:14:23,862] [INFO] [axolotl.core.trainers.base._save:721] [PID:5467] Saving model checkpoint to /Users/duoyun/Desktop/zsh-llm-cli-autocomplete-tool/zsh-lora-output/checkpoint-45
94%|ββββββββββ| 46/49 [38:04<03:05, 61.88s/it]
96%|ββββββββββ| 47/49 [38:53<01:56, 58.24s/it]
98%|ββββββββββ| 48/49 [39:49<00:57, 57.40s/it]
100%|ββββββββββ| 49/49 [40:29<00:00, 52.10s/it][2026-02-21 21:18:03,251] [INFO] [axolotl.core.trainers.base._save:721] [PID:5467] Saving model checkpoint to /Users/duoyun/Desktop/zsh-llm-cli-autocomplete-tool/zsh-lora-output/checkpoint-49
{'train_runtime': 2450.0653, 'train_samples_per_second': 0.16, 'train_steps_per_second': 0.02, 'train_loss': 1.0411046378466549, 'memory/max_active (GiB)': 4.94, 'memory/max_allocated (GiB)': 8.44, 'memory/device_reserved (GiB)': 0, 'epoch': 1.99, 'tokens/train_per_sec_per_gpu': 0.19685673713684082}
100%|ββββββββββ| 49/49 [40:50<00:00, 52.10s/it]
100%|ββββββββββ| 49/49 [40:50<00:00, 50.00s/it]
[2026-02-21 21:18:29,199] [INFO] [axolotl.train.save_trained_model:233] [PID:5467] Training completed! Saving trained model to /Users/duoyun/Desktop/zsh-llm-cli-autocomplete-tool/zsh-lora-output.
[2026-02-21 21:18:31,934] [INFO] [axolotl.train.save_trained_model:351] [PID:5467] Model successfully saved to /Users/duoyun/Desktop/zsh-llm-cli-autocomplete-tool/zsh-lora-output
|