Upload LoRA adapter for CLI completion

dd11053 verified 14 days ago

15.8 kB

	[2026-02-21 20:37:21,574] [DEBUG] [axolotl.utils.config.log_gpu_memory_usage:127] [PID:5467] baseline 0.000GB (+0.000GB allocated)
	[2026-02-21 20:37:21,574] [INFO] [axolotl.cli.config.load_cfg:259] [PID:5467] config:
	{
	"activation_offloading": false,
	"adapter": "lora",
	"axolotl_config_path": "src/training/qwen3_axolotl_config.yml",
	"base_model": "Qwen/Qwen2-0.5B",
	"base_model_config": "Qwen/Qwen2-0.5B",
	"batch_size": 8,
	"bf16": false,
	"capabilities": {
	"bf16": true,
	"fp8": false,
	"n_gpu": 1,
	"n_node": 1
	},
	"context_parallel_size": 1,
	"dataloader_num_workers": 1,
	"dataloader_pin_memory": true,
	"dataloader_prefetch_factor": 256,
	"dataset_num_proc": 8,
	"dataset_prepared_path": "/Users/duoyun/Desktop/zsh-llm-cli-autocomplete-tool/last_run_prepared",
	"datasets": [
	{
	"message_property_mappings": {
	"content": "content",
	"role": "role"
	},
	"path": "/Users/duoyun/Desktop/zsh-llm-cli-autocomplete-tool/src/training/data_splits_axolotl/train_axolotl.jsonl",
	"trust_remote_code": false,
	"type": "alpaca"
	}
	],
	"ddp": false,
	"device": "mps",
	"dion_rank_fraction": 1.0,
	"dion_rank_multiple_of": 1,
	"env_capabilities": {
	"torch_version": "2.10.0"
	},
	"eval_batch_size": 1,
	"eval_causal_lm_metrics": [
	"sacrebleu",
	"comet",
	"ter",
	"chrf"
	],
	"eval_max_new_tokens": 128,
	"eval_table_size": 0,
	"experimental_skip_move_to_device": true,
	"fp16": false,
	"fp8": false,
	"gradient_accumulation_steps": 8,
	"gradient_checkpointing": true,
	"gradient_checkpointing_kwargs": {
	"use_reentrant": true
	},
	"group_by_length": false,
	"include_tkps": true,
	"is_falcon_derived_model": false,
	"is_llama_derived_model": false,
	"is_mistral_derived_model": false,
	"learning_rate": 0.0002,
	"lisa_layers_attribute": "model.layers",
	"load_best_model_at_end": false,
	"load_in_4bit": true,
	"load_in_8bit": false,
	"local_rank": 0,
	"logging_steps": 10,
	"lora_alpha": 16,
	"lora_dropout": 0.05,
	"lora_modules_to_save": [
	"embed_tokens",
	"lm_head"
	],
	"lora_r": 8,
	"lora_target_modules": [
	"q_proj",
	"k_proj",
	"v_proj",
	"o_proj",
	"gate_proj",
	"up_proj",
	"down_proj"
	],
	"loraplus_lr_embedding": 1e-06,
	"lr_scheduler": "cosine",
	"mean_resizing_embeddings": false,
	"micro_batch_size": 1,
	"model_config_type": "qwen2",
	"num_epochs": 2.0,
	"optimizer": "adamw_torch",
	"otel_metrics_host": "localhost",
	"otel_metrics_port": 8000,
	"output_dir": "/Users/duoyun/Desktop/zsh-llm-cli-autocomplete-tool/zsh-lora-output",
	"pad_to_sequence_len": true,
	"pretrain_multipack_attn": true,
	"profiler_steps_start": 0,
	"qlora_sharded_model_loading": false,
	"ray_num_workers": 1,
	"resources_per_worker": {
	"GPU": 1
	},
	"sample_packing": false,
	"sample_packing_bin_size": 200,
	"sample_packing_group_size": 100000,
	"save_only_model": false,
	"save_safetensors": true,
	"save_steps": 15,
	"save_total_limit": 2,
	"sequence_len": 512,
	"shuffle_before_merging_datasets": false,
	"shuffle_merged_datasets": true,
	"skip_prepare_dataset": false,
	"streaming_multipack_buffer_size": 10000,
	"strict": false,
	"tensor_parallel_size": 1,
	"tf32": false,
	"tiled_mlp_use_original_mlp": true,
	"tokenizer_config": "Qwen/Qwen2-0.5B",
	"tokenizer_save_jinja_files": true,
	"tokenizer_type": "AutoTokenizer",
	"torch_dtype": "torch.float32",
	"train_on_inputs": false,
	"trl": {
	"log_completions": false,
	"mask_truncated_completions": false,
	"ref_model_mixup_alpha": 0.9,
	"ref_model_sync_steps": 64,
	"scale_rewards": true,
	"sync_ref_model": false,
	"use_vllm": false,
	"vllm_server_host": "0.0.0.0",
	"vllm_server_port": 8000
	},
	"trust_remote_code": true,
	"type_of_model": "AutoModelForCausalLM",
	"use_otel_metrics": false,
	"use_ray": false,
	"val_set_size": 0.0,
	"vllm": {
	"device": "auto",
	"dtype": "auto",
	"gpu_memory_utilization": 0.9,
	"host": "0.0.0.0",
	"port": 8000
	},
	"wandb_mode": "disabled",
	"warmup_steps": 50,
	"weight_decay": 0.0,
	"world_size": 1
	}
	[2026-02-21 20:37:22,676] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:285] [PID:5467] EOS: 151643 / <\|endoftext\|>
	[2026-02-21 20:37:22,677] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:286] [PID:5467] BOS: None / None
	[2026-02-21 20:37:22,677] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:287] [PID:5467] PAD: 151643 / <\|endoftext\|>
	[2026-02-21 20:37:22,677] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:288] [PID:5467] UNK: None / None
	[2026-02-21 20:37:22,679] [INFO] [axolotl.utils.data.shared.load_preprocessed_dataset:475] [PID:5467] Loading prepared dataset from disk at /Users/duoyun/Desktop/zsh-llm-cli-autocomplete-tool/last_run_prepared/41f31c3c9bc9eb4eb6e943fbbbb74dda...
	[2026-02-21 20:37:22,701] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:417] [PID:5467] total_num_tokens: 12_122
	[2026-02-21 20:37:22,703] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:435] [PID:5467] `total_supervised_tokens: 1_660`
	[2026-02-21 20:37:22,703] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:533] [PID:5467] total_num_steps: 49
	[2026-02-21 20:37:22,703] [INFO] [axolotl.utils.data.sft._prepare_standard_dataset:121] [PID:5467] Maximum number of steps set at 49
	[2026-02-21 20:37:22,735] [DEBUG] [axolotl.train.setup_model_and_tokenizer:70] [PID:5467] loading tokenizer... Qwen/Qwen2-0.5B
	[2026-02-21 20:37:23,594] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:285] [PID:5467] EOS: 151643 / <\|endoftext\|>
	[2026-02-21 20:37:23,594] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:286] [PID:5467] BOS: None / None
	[2026-02-21 20:37:23,594] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:287] [PID:5467] PAD: 151643 / <\|endoftext\|>
	[2026-02-21 20:37:23,594] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:288] [PID:5467] UNK: None / None
	[2026-02-21 20:37:23,594] [DEBUG] [axolotl.train.setup_model_and_tokenizer:82] [PID:5467] Loading model
	[2026-02-21 20:37:23,742] [DEBUG] [axolotl.monkeypatch.transformers.trainer_loss_calc.patch_evaluation_loop:87] [PID:5467] Patched Trainer.evaluation_loop with nanmean loss calculation
	[2026-02-21 20:37:23,744] [DEBUG] [axolotl.monkeypatch.transformers.trainer_loss_calc.patch_maybe_log_save_evaluate:138] [PID:5467] Patched Trainer._maybe_log_save_evaluate with nanmean loss calculation
	[2026-02-21 20:37:23,752] [WARNING] [transformers.modeling_utils.warning_once:328] [PID:5467] `torch_dtype` is deprecated! Use `dtype` instead!
	[2026-02-21 20:37:27,045] [INFO] [axolotl.loaders.model._prepare_model_for_quantization:853] [PID:5467] converting PEFT model w/ prepare_model_for_kbit_training
	[2026-02-21 20:37:27,047] [INFO] [axolotl.loaders.model._configure_embedding_dtypes:347] [PID:5467] Converting modules to torch.float32
	[2026-02-21 20:37:27,049] [DEBUG] [axolotl.loaders.model.log_gpu_memory_usage:127] [PID:5467] Memory usage after model load 0.000GB (+0.000GB allocated)
	[2026-02-21 20:37:27,053] [WARNING] [py.warnings._showwarnmsg:110] [PID:5467] /Users/duoyun/Desktop/zsh-llm-cli-autocomplete-tool/venv/lib/python3.13/site-packages/peft/tuners/tuners_utils.py:1225: UserWarning: Model has `tie_word_embeddings=True` and a tied layer is part of the adapter, but `ensure_weight_tying` is not set to True. This can lead to complications, for example when merging the adapter or converting your model to formats other than safetensors. Check the discussion here: https://github.com/huggingface/peft/issues/2777
	warnings.warn(msg)

	trainable params: 276,668,416 \|\| all params: 770,701,184 \|\| trainable%: 35.8983
	[2026-02-21 20:37:27,656] [DEBUG] [axolotl.loaders.model.log_gpu_memory_usage:127] [PID:5467] after adapters 0.000GB (+0.000GB allocated)
	[2026-02-21 20:37:33,551] [INFO] [axolotl.train.save_initial_configs:413] [PID:5467] Pre-saving adapter config to /Users/duoyun/Desktop/zsh-llm-cli-autocomplete-tool/zsh-lora-output...
	[2026-02-21 20:37:33,553] [INFO] [axolotl.train.save_initial_configs:417] [PID:5467] Pre-saving tokenizer to /Users/duoyun/Desktop/zsh-llm-cli-autocomplete-tool/zsh-lora-output...
	[2026-02-21 20:37:33,733] [INFO] [axolotl.train.save_initial_configs:422] [PID:5467] Pre-saving model config to /Users/duoyun/Desktop/zsh-llm-cli-autocomplete-tool/zsh-lora-output...
	[2026-02-21 20:37:33,737] [INFO] [axolotl.train.execute_training:212] [PID:5467] Starting trainer...
	[2026-02-21 20:37:33,737] [WARNING] [transformers.trainer._align_special_tokens:982] [PID:5467] The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': None, 'pad_token_id': 151643}.
	0%\| \| 0/49 [00:00<?, ?it/s][2026-02-21 20:37:34,099] [WARNING] [py.warnings._showwarnmsg:110] [PID:5467] /Users/duoyun/Desktop/zsh-llm-cli-autocomplete-tool/venv/lib/python3.13/site-packages/torch/utils/data/dataloader.py:1118: UserWarning: 'pin_memory' argument is set as true but not supported on MPS now, device pinned memory won't be used.
	super().__init__(loader)

	2%\|▏ \| 1/49 [00:49<39:37, 49.52s/it] 4%\|▍ \| 2/49 [01:38<38:45, 49.48s/it] 6%\|▌ \| 3/49 [02:39<41:49, 54.56s/it] 8%\|▊ \| 4/49 [03:19<36:40, 48.90s/it] 10%\|█ \| 5/49 [04:06<35:15, 48.07s/it] 12%\|█▏ \| 6/49 [04:46<32:31, 45.39s/it] 14%\|█▍ \| 7/49 [05:33<32:04, 45.82s/it] 16%\|█▋ \| 8/49 [06:19<31:27, 46.03s/it] 18%\|█▊ \| 9/49 [06:59<29:27, 44.20s/it] 20%\|██ \| 10/49 [07:39<27:45, 42.71s/it] {'loss': 2.1003, 'grad_norm': 29.422929763793945, 'learning_rate': 3.6e-05, 'ppl': 8.16862, 'memory/max_active (GiB)': 4.94, 'memory/max_allocated (GiB)': 8.44, 'memory/device_reserved (GiB)': 0, 'tokens/train_per_sec_per_gpu': 0.3365485370159149, 'tokens/total': 40960, 'tokens/trainable': 732, 'epoch': 0.41}
	20%\|██ \| 10/49 [07:43<27:45, 42.71s/it] 22%\|██▏ \| 11/49 [08:20<26:48, 42.33s/it] 24%\|██▍ \| 12/49 [09:11<27:37, 44.80s/it] 27%\|██▋ \| 13/49 [09:56<27:02, 45.07s/it] 29%\|██▊ \| 14/49 [10:45<26:55, 46.15s/it] 31%\|███ \| 15/49 [11:29<25:49, 45.58s/it][2026-02-21 20:49:03,976] [INFO] [axolotl.core.trainers.base._save:721] [PID:5467] Saving model checkpoint to /Users/duoyun/Desktop/zsh-llm-cli-autocomplete-tool/zsh-lora-output/checkpoint-15
	33%\|███▎ \| 16/49 [12:34<28:15, 51.37s/it] 35%\|███▍ \| 17/49 [13:17<26:04, 48.90s/it] 37%\|███▋ \| 18/49 [14:01<24:26, 47.32s/it] 39%\|███▉ \| 19/49 [14:52<24:09, 48.31s/it] 41%\|████ \| 20/49 [15:42<23:36, 48.83s/it] {'loss': 1.3313, 'grad_norm': 20.819232940673828, 'learning_rate': 7.6e-05, 'ppl': 3.78596, 'memory/max_active (GiB)': 4.94, 'memory/max_allocated (GiB)': 8.44, 'memory/device_reserved (GiB)': 0, 'tokens/train_per_sec_per_gpu': 0.13703587651252747, 'tokens/total': 81920, 'tokens/trainable': 1375, 'epoch': 0.82}
	41%\|████ \| 20/49 [15:47<23:36, 48.83s/it] 43%\|████▎ \| 21/49 [16:32<22:58, 49.25s/it] 45%\|████▍ \| 22/49 [17:12<20:53, 46.42s/it] 47%\|████▋ \| 23/49 [17:55<19:46, 45.63s/it] 49%\|████▉ \| 24/49 [18:36<18:24, 44.19s/it] 51%\|█████ \| 25/49 [19:00<15:11, 37.96s/it][2026-02-21 20:56:34,355] [WARNING] [py.warnings._showwarnmsg:110] [PID:5467] /Users/duoyun/Desktop/zsh-llm-cli-autocomplete-tool/venv/lib/python3.13/site-packages/torch/utils/data/dataloader.py:1118: UserWarning: 'pin_memory' argument is set as true but not supported on MPS now, device pinned memory won't be used.
	super().__init__(loader)

	53%\|█████▎ \| 26/49 [19:50<16:01, 41.81s/it] 55%\|█████▌ \| 27/49 [20:32<15:20, 41.84s/it] 57%\|█████▋ \| 28/49 [21:19<15:08, 43.24s/it] 59%\|█████▉ \| 29/49 [22:10<15:13, 45.67s/it] 61%\|██████ \| 30/49 [23:16<16:22, 51.72s/it] {'loss': 0.8107, 'grad_norm': 10.385747909545898, 'learning_rate': 0.000116, 'ppl': 2.24948, 'memory/max_active (GiB)': 4.94, 'memory/max_allocated (GiB)': 8.44, 'memory/device_reserved (GiB)': 0, 'tokens/train_per_sec_per_gpu': 0.2631860375404358, 'tokens/total': 119808, 'tokens/trainable': 1990, 'epoch': 1.21}
	61%\|██████ \| 30/49 [23:22<16:22, 51.72s/it][2026-02-21 21:00:56,360] [INFO] [axolotl.core.trainers.base._save:721] [PID:5467] Saving model checkpoint to /Users/duoyun/Desktop/zsh-llm-cli-autocomplete-tool/zsh-lora-output/checkpoint-30
	63%\|██████▎ \| 31/49 [24:24<16:58, 56.56s/it] 65%\|██████▌ \| 32/49 [25:20<16:00, 56.51s/it] 67%\|██████▋ \| 33/49 [26:27<15:51, 59.48s/it] 69%\|██████▉ \| 34/49 [27:21<14:26, 57.79s/it] 71%\|███████▏ \| 35/49 [28:10<12:52, 55.20s/it] 73%\|███████▎ \| 36/49 [28:50<11:00, 50.77s/it] 76%\|███████▌ \| 37/49 [29:32<09:37, 48.10s/it] 78%\|███████▊ \| 38/49 [30:19<08:46, 47.83s/it] 80%\|███████▉ \| 39/49 [31:07<07:57, 47.79s/it] 82%\|████████▏ \| 40/49 [31:54<07:07, 47.45s/it] {'loss': 0.4243, 'grad_norm': 9.247703552246094, 'learning_rate': 0.00015600000000000002, 'ppl': 1.52852, 'memory/max_active (GiB)': 4.94, 'memory/max_allocated (GiB)': 8.44, 'memory/device_reserved (GiB)': 0, 'tokens/train_per_sec_per_gpu': 0.12217021733522415, 'tokens/total': 160768, 'tokens/trainable': 2671, 'epoch': 1.62}
	82%\|████████▏ \| 40/49 [31:59<07:07, 47.45s/it] 84%\|████████▎ \| 41/49 [33:01<07:06, 53.31s/it] 86%\|████████▌ \| 42/49 [33:56<06:17, 53.91s/it] 88%\|████████▊ \| 43/49 [34:57<05:35, 55.99s/it] 90%\|████████▉ \| 44/49 [35:49<04:34, 54.85s/it] 92%\|█████████▏\| 45/49 [36:49<03:45, 56.49s/it][2026-02-21 21:14:23,862] [INFO] [axolotl.core.trainers.base._save:721] [PID:5467] Saving model checkpoint to /Users/duoyun/Desktop/zsh-llm-cli-autocomplete-tool/zsh-lora-output/checkpoint-45
	94%\|█████████▍\| 46/49 [38:04<03:05, 61.88s/it] 96%\|█████████▌\| 47/49 [38:53<01:56, 58.24s/it] 98%\|█████████▊\| 48/49 [39:49<00:57, 57.40s/it] 100%\|██████████\| 49/49 [40:29<00:00, 52.10s/it][2026-02-21 21:18:03,251] [INFO] [axolotl.core.trainers.base._save:721] [PID:5467] Saving model checkpoint to /Users/duoyun/Desktop/zsh-llm-cli-autocomplete-tool/zsh-lora-output/checkpoint-49
	{'train_runtime': 2450.0653, 'train_samples_per_second': 0.16, 'train_steps_per_second': 0.02, 'train_loss': 1.0411046378466549, 'memory/max_active (GiB)': 4.94, 'memory/max_allocated (GiB)': 8.44, 'memory/device_reserved (GiB)': 0, 'epoch': 1.99, 'tokens/train_per_sec_per_gpu': 0.19685673713684082}
	100%\|██████████\| 49/49 [40:50<00:00, 52.10s/it] 100%\|██████████\| 49/49 [40:50<00:00, 50.00s/it]
	[2026-02-21 21:18:29,199] [INFO] [axolotl.train.save_trained_model:233] [PID:5467] Training completed! Saving trained model to /Users/duoyun/Desktop/zsh-llm-cli-autocomplete-tool/zsh-lora-output.
	[2026-02-21 21:18:31,934] [INFO] [axolotl.train.save_trained_model:351] [PID:5467] Model successfully saved to /Users/duoyun/Desktop/zsh-llm-cli-autocomplete-tool/zsh-lora-output