zsh-cli-lora / debug.log
duoyuncloud's picture
Upload LoRA adapter for CLI completion
dd11053 verified
[2026-02-21 20:37:21,574] [DEBUG] [axolotl.utils.config.log_gpu_memory_usage:127] [PID:5467] baseline 0.000GB (+0.000GB allocated)
[2026-02-21 20:37:21,574] [INFO] [axolotl.cli.config.load_cfg:259] [PID:5467] config:
{
"activation_offloading": false,
"adapter": "lora",
"axolotl_config_path": "src/training/qwen3_axolotl_config.yml",
"base_model": "Qwen/Qwen2-0.5B",
"base_model_config": "Qwen/Qwen2-0.5B",
"batch_size": 8,
"bf16": false,
"capabilities": {
"bf16": true,
"fp8": false,
"n_gpu": 1,
"n_node": 1
},
"context_parallel_size": 1,
"dataloader_num_workers": 1,
"dataloader_pin_memory": true,
"dataloader_prefetch_factor": 256,
"dataset_num_proc": 8,
"dataset_prepared_path": "/Users/duoyun/Desktop/zsh-llm-cli-autocomplete-tool/last_run_prepared",
"datasets": [
{
"message_property_mappings": {
"content": "content",
"role": "role"
},
"path": "/Users/duoyun/Desktop/zsh-llm-cli-autocomplete-tool/src/training/data_splits_axolotl/train_axolotl.jsonl",
"trust_remote_code": false,
"type": "alpaca"
}
],
"ddp": false,
"device": "mps",
"dion_rank_fraction": 1.0,
"dion_rank_multiple_of": 1,
"env_capabilities": {
"torch_version": "2.10.0"
},
"eval_batch_size": 1,
"eval_causal_lm_metrics": [
"sacrebleu",
"comet",
"ter",
"chrf"
],
"eval_max_new_tokens": 128,
"eval_table_size": 0,
"experimental_skip_move_to_device": true,
"fp16": false,
"fp8": false,
"gradient_accumulation_steps": 8,
"gradient_checkpointing": true,
"gradient_checkpointing_kwargs": {
"use_reentrant": true
},
"group_by_length": false,
"include_tkps": true,
"is_falcon_derived_model": false,
"is_llama_derived_model": false,
"is_mistral_derived_model": false,
"learning_rate": 0.0002,
"lisa_layers_attribute": "model.layers",
"load_best_model_at_end": false,
"load_in_4bit": true,
"load_in_8bit": false,
"local_rank": 0,
"logging_steps": 10,
"lora_alpha": 16,
"lora_dropout": 0.05,
"lora_modules_to_save": [
"embed_tokens",
"lm_head"
],
"lora_r": 8,
"lora_target_modules": [
"q_proj",
"k_proj",
"v_proj",
"o_proj",
"gate_proj",
"up_proj",
"down_proj"
],
"loraplus_lr_embedding": 1e-06,
"lr_scheduler": "cosine",
"mean_resizing_embeddings": false,
"micro_batch_size": 1,
"model_config_type": "qwen2",
"num_epochs": 2.0,
"optimizer": "adamw_torch",
"otel_metrics_host": "localhost",
"otel_metrics_port": 8000,
"output_dir": "/Users/duoyun/Desktop/zsh-llm-cli-autocomplete-tool/zsh-lora-output",
"pad_to_sequence_len": true,
"pretrain_multipack_attn": true,
"profiler_steps_start": 0,
"qlora_sharded_model_loading": false,
"ray_num_workers": 1,
"resources_per_worker": {
"GPU": 1
},
"sample_packing": false,
"sample_packing_bin_size": 200,
"sample_packing_group_size": 100000,
"save_only_model": false,
"save_safetensors": true,
"save_steps": 15,
"save_total_limit": 2,
"sequence_len": 512,
"shuffle_before_merging_datasets": false,
"shuffle_merged_datasets": true,
"skip_prepare_dataset": false,
"streaming_multipack_buffer_size": 10000,
"strict": false,
"tensor_parallel_size": 1,
"tf32": false,
"tiled_mlp_use_original_mlp": true,
"tokenizer_config": "Qwen/Qwen2-0.5B",
"tokenizer_save_jinja_files": true,
"tokenizer_type": "AutoTokenizer",
"torch_dtype": "torch.float32",
"train_on_inputs": false,
"trl": {
"log_completions": false,
"mask_truncated_completions": false,
"ref_model_mixup_alpha": 0.9,
"ref_model_sync_steps": 64,
"scale_rewards": true,
"sync_ref_model": false,
"use_vllm": false,
"vllm_server_host": "0.0.0.0",
"vllm_server_port": 8000
},
"trust_remote_code": true,
"type_of_model": "AutoModelForCausalLM",
"use_otel_metrics": false,
"use_ray": false,
"val_set_size": 0.0,
"vllm": {
"device": "auto",
"dtype": "auto",
"gpu_memory_utilization": 0.9,
"host": "0.0.0.0",
"port": 8000
},
"wandb_mode": "disabled",
"warmup_steps": 50,
"weight_decay": 0.0,
"world_size": 1
}
[2026-02-21 20:37:22,676] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:285] [PID:5467] EOS: 151643 / <|endoftext|>
[2026-02-21 20:37:22,677] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:286] [PID:5467] BOS: None / None
[2026-02-21 20:37:22,677] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:287] [PID:5467] PAD: 151643 / <|endoftext|>
[2026-02-21 20:37:22,677] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:288] [PID:5467] UNK: None / None
[2026-02-21 20:37:22,679] [INFO] [axolotl.utils.data.shared.load_preprocessed_dataset:475] [PID:5467] Loading prepared dataset from disk at /Users/duoyun/Desktop/zsh-llm-cli-autocomplete-tool/last_run_prepared/41f31c3c9bc9eb4eb6e943fbbbb74dda...
[2026-02-21 20:37:22,701] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:417] [PID:5467] total_num_tokens: 12_122
[2026-02-21 20:37:22,703] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:435] [PID:5467] `total_supervised_tokens: 1_660`
[2026-02-21 20:37:22,703] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:533] [PID:5467] total_num_steps: 49
[2026-02-21 20:37:22,703] [INFO] [axolotl.utils.data.sft._prepare_standard_dataset:121] [PID:5467] Maximum number of steps set at 49
[2026-02-21 20:37:22,735] [DEBUG] [axolotl.train.setup_model_and_tokenizer:70] [PID:5467] loading tokenizer... Qwen/Qwen2-0.5B
[2026-02-21 20:37:23,594] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:285] [PID:5467] EOS: 151643 / <|endoftext|>
[2026-02-21 20:37:23,594] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:286] [PID:5467] BOS: None / None
[2026-02-21 20:37:23,594] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:287] [PID:5467] PAD: 151643 / <|endoftext|>
[2026-02-21 20:37:23,594] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:288] [PID:5467] UNK: None / None
[2026-02-21 20:37:23,594] [DEBUG] [axolotl.train.setup_model_and_tokenizer:82] [PID:5467] Loading model
[2026-02-21 20:37:23,742] [DEBUG] [axolotl.monkeypatch.transformers.trainer_loss_calc.patch_evaluation_loop:87] [PID:5467] Patched Trainer.evaluation_loop with nanmean loss calculation
[2026-02-21 20:37:23,744] [DEBUG] [axolotl.monkeypatch.transformers.trainer_loss_calc.patch_maybe_log_save_evaluate:138] [PID:5467] Patched Trainer._maybe_log_save_evaluate with nanmean loss calculation
[2026-02-21 20:37:23,752] [WARNING] [transformers.modeling_utils.warning_once:328] [PID:5467] `torch_dtype` is deprecated! Use `dtype` instead!
[2026-02-21 20:37:27,045] [INFO] [axolotl.loaders.model._prepare_model_for_quantization:853] [PID:5467] converting PEFT model w/ prepare_model_for_kbit_training
[2026-02-21 20:37:27,047] [INFO] [axolotl.loaders.model._configure_embedding_dtypes:347] [PID:5467] Converting modules to torch.float32
[2026-02-21 20:37:27,049] [DEBUG] [axolotl.loaders.model.log_gpu_memory_usage:127] [PID:5467] Memory usage after model load 0.000GB (+0.000GB allocated)
[2026-02-21 20:37:27,053] [WARNING] [py.warnings._showwarnmsg:110] [PID:5467] /Users/duoyun/Desktop/zsh-llm-cli-autocomplete-tool/venv/lib/python3.13/site-packages/peft/tuners/tuners_utils.py:1225: UserWarning: Model has `tie_word_embeddings=True` and a tied layer is part of the adapter, but `ensure_weight_tying` is not set to True. This can lead to complications, for example when merging the adapter or converting your model to formats other than safetensors. Check the discussion here: https://github.com/huggingface/peft/issues/2777
warnings.warn(msg)
trainable params: 276,668,416 || all params: 770,701,184 || trainable%: 35.8983
[2026-02-21 20:37:27,656] [DEBUG] [axolotl.loaders.model.log_gpu_memory_usage:127] [PID:5467] after adapters 0.000GB (+0.000GB allocated)
[2026-02-21 20:37:33,551] [INFO] [axolotl.train.save_initial_configs:413] [PID:5467] Pre-saving adapter config to /Users/duoyun/Desktop/zsh-llm-cli-autocomplete-tool/zsh-lora-output...
[2026-02-21 20:37:33,553] [INFO] [axolotl.train.save_initial_configs:417] [PID:5467] Pre-saving tokenizer to /Users/duoyun/Desktop/zsh-llm-cli-autocomplete-tool/zsh-lora-output...
[2026-02-21 20:37:33,733] [INFO] [axolotl.train.save_initial_configs:422] [PID:5467] Pre-saving model config to /Users/duoyun/Desktop/zsh-llm-cli-autocomplete-tool/zsh-lora-output...
[2026-02-21 20:37:33,737] [INFO] [axolotl.train.execute_training:212] [PID:5467] Starting trainer...
[2026-02-21 20:37:33,737] [WARNING] [transformers.trainer._align_special_tokens:982] [PID:5467] The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': None, 'pad_token_id': 151643}.
0%| | 0/49 [00:00<?, ?it/s][2026-02-21 20:37:34,099] [WARNING] [py.warnings._showwarnmsg:110] [PID:5467] /Users/duoyun/Desktop/zsh-llm-cli-autocomplete-tool/venv/lib/python3.13/site-packages/torch/utils/data/dataloader.py:1118: UserWarning: 'pin_memory' argument is set as true but not supported on MPS now, device pinned memory won't be used.
super().__init__(loader)
2%|▏ | 1/49 [00:49<39:37, 49.52s/it] 4%|▍ | 2/49 [01:38<38:45, 49.48s/it] 6%|β–Œ | 3/49 [02:39<41:49, 54.56s/it] 8%|β–Š | 4/49 [03:19<36:40, 48.90s/it] 10%|β–ˆ | 5/49 [04:06<35:15, 48.07s/it] 12%|β–ˆβ– | 6/49 [04:46<32:31, 45.39s/it] 14%|β–ˆβ– | 7/49 [05:33<32:04, 45.82s/it] 16%|β–ˆβ–‹ | 8/49 [06:19<31:27, 46.03s/it] 18%|β–ˆβ–Š | 9/49 [06:59<29:27, 44.20s/it] 20%|β–ˆβ–ˆ | 10/49 [07:39<27:45, 42.71s/it] {'loss': 2.1003, 'grad_norm': 29.422929763793945, 'learning_rate': 3.6e-05, 'ppl': 8.16862, 'memory/max_active (GiB)': 4.94, 'memory/max_allocated (GiB)': 8.44, 'memory/device_reserved (GiB)': 0, 'tokens/train_per_sec_per_gpu': 0.3365485370159149, 'tokens/total': 40960, 'tokens/trainable': 732, 'epoch': 0.41}
20%|β–ˆβ–ˆ | 10/49 [07:43<27:45, 42.71s/it] 22%|β–ˆβ–ˆβ– | 11/49 [08:20<26:48, 42.33s/it] 24%|β–ˆβ–ˆβ– | 12/49 [09:11<27:37, 44.80s/it] 27%|β–ˆβ–ˆβ–‹ | 13/49 [09:56<27:02, 45.07s/it] 29%|β–ˆβ–ˆβ–Š | 14/49 [10:45<26:55, 46.15s/it] 31%|β–ˆβ–ˆβ–ˆ | 15/49 [11:29<25:49, 45.58s/it][2026-02-21 20:49:03,976] [INFO] [axolotl.core.trainers.base._save:721] [PID:5467] Saving model checkpoint to /Users/duoyun/Desktop/zsh-llm-cli-autocomplete-tool/zsh-lora-output/checkpoint-15
33%|β–ˆβ–ˆβ–ˆβ–Ž | 16/49 [12:34<28:15, 51.37s/it] 35%|β–ˆβ–ˆβ–ˆβ– | 17/49 [13:17<26:04, 48.90s/it] 37%|β–ˆβ–ˆβ–ˆβ–‹ | 18/49 [14:01<24:26, 47.32s/it] 39%|β–ˆβ–ˆβ–ˆβ–‰ | 19/49 [14:52<24:09, 48.31s/it] 41%|β–ˆβ–ˆβ–ˆβ–ˆ | 20/49 [15:42<23:36, 48.83s/it] {'loss': 1.3313, 'grad_norm': 20.819232940673828, 'learning_rate': 7.6e-05, 'ppl': 3.78596, 'memory/max_active (GiB)': 4.94, 'memory/max_allocated (GiB)': 8.44, 'memory/device_reserved (GiB)': 0, 'tokens/train_per_sec_per_gpu': 0.13703587651252747, 'tokens/total': 81920, 'tokens/trainable': 1375, 'epoch': 0.82}
41%|β–ˆβ–ˆβ–ˆβ–ˆ | 20/49 [15:47<23:36, 48.83s/it] 43%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 21/49 [16:32<22:58, 49.25s/it] 45%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 22/49 [17:12<20:53, 46.42s/it] 47%|β–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 23/49 [17:55<19:46, 45.63s/it] 49%|β–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 24/49 [18:36<18:24, 44.19s/it] 51%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 25/49 [19:00<15:11, 37.96s/it][2026-02-21 20:56:34,355] [WARNING] [py.warnings._showwarnmsg:110] [PID:5467] /Users/duoyun/Desktop/zsh-llm-cli-autocomplete-tool/venv/lib/python3.13/site-packages/torch/utils/data/dataloader.py:1118: UserWarning: 'pin_memory' argument is set as true but not supported on MPS now, device pinned memory won't be used.
super().__init__(loader)
53%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 26/49 [19:50<16:01, 41.81s/it] 55%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 27/49 [20:32<15:20, 41.84s/it] 57%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 28/49 [21:19<15:08, 43.24s/it] 59%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 29/49 [22:10<15:13, 45.67s/it] 61%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 30/49 [23:16<16:22, 51.72s/it] {'loss': 0.8107, 'grad_norm': 10.385747909545898, 'learning_rate': 0.000116, 'ppl': 2.24948, 'memory/max_active (GiB)': 4.94, 'memory/max_allocated (GiB)': 8.44, 'memory/device_reserved (GiB)': 0, 'tokens/train_per_sec_per_gpu': 0.2631860375404358, 'tokens/total': 119808, 'tokens/trainable': 1990, 'epoch': 1.21}
61%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 30/49 [23:22<16:22, 51.72s/it][2026-02-21 21:00:56,360] [INFO] [axolotl.core.trainers.base._save:721] [PID:5467] Saving model checkpoint to /Users/duoyun/Desktop/zsh-llm-cli-autocomplete-tool/zsh-lora-output/checkpoint-30
63%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 31/49 [24:24<16:58, 56.56s/it] 65%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 32/49 [25:20<16:00, 56.51s/it] 67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 33/49 [26:27<15:51, 59.48s/it] 69%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 34/49 [27:21<14:26, 57.79s/it] 71%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 35/49 [28:10<12:52, 55.20s/it] 73%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 36/49 [28:50<11:00, 50.77s/it] 76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 37/49 [29:32<09:37, 48.10s/it] 78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 38/49 [30:19<08:46, 47.83s/it] 80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 39/49 [31:07<07:57, 47.79s/it] 82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 40/49 [31:54<07:07, 47.45s/it] {'loss': 0.4243, 'grad_norm': 9.247703552246094, 'learning_rate': 0.00015600000000000002, 'ppl': 1.52852, 'memory/max_active (GiB)': 4.94, 'memory/max_allocated (GiB)': 8.44, 'memory/device_reserved (GiB)': 0, 'tokens/train_per_sec_per_gpu': 0.12217021733522415, 'tokens/total': 160768, 'tokens/trainable': 2671, 'epoch': 1.62}
82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 40/49 [31:59<07:07, 47.45s/it] 84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 41/49 [33:01<07:06, 53.31s/it] 86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 42/49 [33:56<06:17, 53.91s/it] 88%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 43/49 [34:57<05:35, 55.99s/it] 90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 44/49 [35:49<04:34, 54.85s/it] 92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 45/49 [36:49<03:45, 56.49s/it][2026-02-21 21:14:23,862] [INFO] [axolotl.core.trainers.base._save:721] [PID:5467] Saving model checkpoint to /Users/duoyun/Desktop/zsh-llm-cli-autocomplete-tool/zsh-lora-output/checkpoint-45
94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 46/49 [38:04<03:05, 61.88s/it] 96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 47/49 [38:53<01:56, 58.24s/it] 98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 48/49 [39:49<00:57, 57.40s/it] 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 49/49 [40:29<00:00, 52.10s/it][2026-02-21 21:18:03,251] [INFO] [axolotl.core.trainers.base._save:721] [PID:5467] Saving model checkpoint to /Users/duoyun/Desktop/zsh-llm-cli-autocomplete-tool/zsh-lora-output/checkpoint-49
{'train_runtime': 2450.0653, 'train_samples_per_second': 0.16, 'train_steps_per_second': 0.02, 'train_loss': 1.0411046378466549, 'memory/max_active (GiB)': 4.94, 'memory/max_allocated (GiB)': 8.44, 'memory/device_reserved (GiB)': 0, 'epoch': 1.99, 'tokens/train_per_sec_per_gpu': 0.19685673713684082}
100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 49/49 [40:50<00:00, 52.10s/it] 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 49/49 [40:50<00:00, 50.00s/it]
[2026-02-21 21:18:29,199] [INFO] [axolotl.train.save_trained_model:233] [PID:5467] Training completed! Saving trained model to /Users/duoyun/Desktop/zsh-llm-cli-autocomplete-tool/zsh-lora-output.
[2026-02-21 21:18:31,934] [INFO] [axolotl.train.save_trained_model:351] [PID:5467] Model successfully saved to /Users/duoyun/Desktop/zsh-llm-cli-autocomplete-tool/zsh-lora-output