File size: 99,476 Bytes
ea4efc0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 |
[2025-10-22 12:55:24,619] [DEBUG] [axolotl.utils.config.resolve_dtype:66] [PID:2418] bf16 support detected, enabling for this configuration.
config.json: 0%| | 0.00/663 [00:00<?, ?B/s]
config.json: 100%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 663/663 [00:00<00:00, 7.86MB/s]
[2025-10-22 12:55:24,766] [DEBUG] [axolotl.utils.config.log_gpu_memory_usage:127] [PID:2418] baseline 0.000GB ()
[2025-10-22 12:55:24,766] [INFO] [axolotl.cli.config.load_cfg:248] [PID:2418] config:
{
"activation_offloading": false,
"adapter": "lora",
"axolotl_config_path": "config.yaml",
"base_model": "Qwen/Qwen2.5-7B-Instruct",
"base_model_config": "Qwen/Qwen2.5-7B-Instruct",
"batch_size": 16,
"bf16": true,
"capabilities": {
"bf16": true,
"compute_capability": "sm_90",
"fp8": false,
"n_gpu": 1,
"n_node": 1
},
"context_parallel_size": 1,
"dataloader_num_workers": 1,
"dataloader_pin_memory": true,
"dataloader_prefetch_factor": 256,
"dataset_processes": 36,
"datasets": [
{
"message_property_mappings": {
"content": "content",
"role": "role"
},
"path": "/workspace/fine-tuning/data/data.json",
"trust_remote_code": false,
"type": "alpaca"
}
],
"ddp": false,
"device": "cuda:0",
"dion_rank_fraction": 1.0,
"dion_rank_multiple_of": 1,
"env_capabilities": {
"torch_version": "2.7.1"
},
"eval_batch_size": 4,
"eval_causal_lm_metrics": [
"sacrebleu",
"comet",
"ter",
"chrf"
],
"eval_max_new_tokens": 128,
"eval_table_size": 0,
"experimental_skip_move_to_device": true,
"flash_attention": true,
"fp16": false,
"gradient_accumulation_steps": 4,
"gradient_checkpointing": true,
"gradient_checkpointing_kwargs": {
"use_reentrant": true
},
"include_tkps": true,
"learning_rate": 0.0002,
"lisa_layers_attribute": "model.layers",
"load_best_model_at_end": false,
"load_in_4bit": false,
"load_in_8bit": true,
"local_rank": 0,
"logging_steps": 10,
"lora_alpha": 16,
"lora_dropout": 0.05,
"lora_r": 8,
"lora_target_modules": [
"q_proj",
"k_proj",
"v_proj",
"o_proj",
"gate_proj",
"down_proj",
"up_proj"
],
"loraplus_lr_embedding": 1e-06,
"lr_scheduler": "cosine",
"mean_resizing_embeddings": false,
"micro_batch_size": 4,
"model_config_type": "qwen2",
"num_epochs": 3.0,
"optimizer": "adamw_bnb_8bit",
"output_dir": "/workspace/fine-tuning/output",
"pretrain_multipack_attn": true,
"profiler_steps_start": 0,
"qlora_sharded_model_loading": false,
"ray_num_workers": 1,
"resources_per_worker": {
"GPU": 1
},
"sample_packing_bin_size": 200,
"sample_packing_group_size": 100000,
"save_only_model": false,
"save_safetensors": true,
"save_steps": 200,
"save_total_limit": 2,
"sequence_len": 4096,
"shuffle_before_merging_datasets": false,
"shuffle_merged_datasets": true,
"skip_prepare_dataset": false,
"streaming_multipack_buffer_size": 10000,
"strict": false,
"tensor_parallel_size": 1,
"tiled_mlp_use_original_mlp": true,
"tokenizer_config": "Qwen/Qwen2.5-7B-Instruct",
"tokenizer_save_jinja_files": true,
"torch_dtype": "torch.bfloat16",
"train_on_inputs": false,
"trl": {
"log_completions": false,
"mask_truncated_completions": false,
"ref_model_mixup_alpha": 0.9,
"ref_model_sync_steps": 64,
"scale_rewards": true,
"sync_ref_model": false,
"use_vllm": false,
"vllm_server_host": "0.0.0.0",
"vllm_server_port": 8000
},
"use_ray": false,
"val_set_size": 0.0,
"vllm": {
"device": "auto",
"dtype": "auto",
"gpu_memory_utilization": 0.9,
"host": "0.0.0.0",
"port": 8000
},
"weight_decay": 0.0,
"world_size": 1
}
tokenizer_config.json: 0.00B [00:00, ?B/s]
tokenizer_config.json: 7.30kB [00:00, 45.7MB/s]
vocab.json: 0.00B [00:00, ?B/s]
vocab.json: 2.78MB [00:00, 112MB/s]
merges.txt: 0.00B [00:00, ?B/s]
merges.txt: 1.67MB [00:00, 129MB/s]
tokenizer.json: 0.00B [00:00, ?B/s]
tokenizer.json: 7.03MB [00:00, 109MB/s]
[2025-10-22 12:55:25,791] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:278] [PID:2418] EOS: 151645 / <|im_end|>
[2025-10-22 12:55:25,792] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:279] [PID:2418] BOS: None / None
[2025-10-22 12:55:25,792] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:280] [PID:2418] PAD: 151643 / <|endoftext|>
[2025-10-22 12:55:25,792] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:281] [PID:2418] UNK: None / None
[2025-10-22 12:55:25,792] [INFO] [axolotl.utils.data.shared.load_preprocessed_dataset:476] [PID:2418] Unable to find prepared dataset in last_run_prepared/a99a12059c50ab085817560a37dbde6c
[2025-10-22 12:55:25,792] [INFO] [axolotl.utils.data.sft._load_raw_datasets:320] [PID:2418] Loading raw datasets...
[2025-10-22 12:55:25,792] [WARNING] [axolotl.utils.data.sft._load_raw_datasets:322] [PID:2418] Processing datasets during training can lead to VRAM instability. Please pre-process your dataset using `axolotl preprocess path/to/config.yml`.
Generating train split: 0 examples [00:00, ? examples/s]
Generating train split: 333 examples [00:00, 21071.50 examples/s]
[2025-10-22 12:55:25,923] [INFO] [axolotl.utils.data.wrappers.get_dataset_wrapper:87] [PID:2418] Loading dataset: /workspace/fine-tuning/data/data.json with base_type: alpaca and prompt_style: None
Tokenizing Prompts (num_proc=36): 0%| | 0/333 [00:00<?, ? examples/s]
Tokenizing Prompts (num_proc=36): 3%|βββ | 10/333 [00:01<00:34, 9.43 examples/s]
Tokenizing Prompts (num_proc=36): 6%|βββββ | 20/333 [00:01<00:15, 20.05 examples/s]
Tokenizing Prompts (num_proc=36): 9%|βββββββ | 30/333 [00:01<00:09, 31.09 examples/s]
Tokenizing Prompts (num_proc=36): 15%|βββββββββββ | 50/333 [00:01<00:04, 59.04 examples/s]
Tokenizing Prompts (num_proc=36): 21%|ββββββββββββββββ | 70/333 [00:01<00:03, 77.51 examples/s]
Tokenizing Prompts (num_proc=36): 27%|ββββββββββββββββββββ | 90/333 [00:01<00:02, 85.94 examples/s]
Tokenizing Prompts (num_proc=36): 32%|ββββββββββββββββββββββββ | 108/333 [00:01<00:02, 86.61 examples/s]
Tokenizing Prompts (num_proc=36): 38%|ββββββββββββββββββββββββββββ | 126/333 [00:02<00:02, 92.64 examples/s]
Tokenizing Prompts (num_proc=36): 43%|ββββββββββββββββββββββββββββββββ | 144/333 [00:02<00:01, 102.80 examples/s]
Tokenizing Prompts (num_proc=36): 49%|ββββββββββββββββββββββββββββββββββββ | 162/333 [00:02<00:01, 96.29 examples/s]
Tokenizing Prompts (num_proc=36): 54%|ββββββββββββββββββββββββββββββββββββββββ | 180/333 [00:02<00:01, 91.12 examples/s]
Tokenizing Prompts (num_proc=36): 65%|βββββββββββββββββββββββββββββββββββββββββββββββ | 216/333 [00:02<00:00, 117.53 examples/s]
Tokenizing Prompts (num_proc=36): 70%|βββββββββββββββββββββββββββββββββββββββββββββββββββ | 234/333 [00:03<00:00, 107.77 examples/s]
Tokenizing Prompts (num_proc=36): 76%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 252/333 [00:03<00:00, 113.43 examples/s]
Tokenizing Prompts (num_proc=36): 84%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 279/333 [00:03<00:00, 134.23 examples/s]
Tokenizing Prompts (num_proc=36): 89%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 297/333 [00:03<00:00, 114.85 examples/s]
Tokenizing Prompts (num_proc=36): 97%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 324/333 [00:03<00:00, 132.86 examples/s]
Tokenizing Prompts (num_proc=36): 100%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 333/333 [00:04<00:00, 80.47 examples/s]
[2025-10-22 12:55:30,239] [INFO] [axolotl.utils.data.utils.handle_long_seq_in_dataset:218] [PID:2418] min_input_len: 36
[2025-10-22 12:55:30,239] [INFO] [axolotl.utils.data.utils.handle_long_seq_in_dataset:220] [PID:2418] max_input_len: 1350
Dropping Long Sequences (>4096) (num_proc=36): 0%| | 0/333 [00:00<?, ? examples/s]
Dropping Long Sequences (>4096) (num_proc=36): 3%|ββ | 10/333 [00:00<00:27, 11.56 examples/s]
Dropping Long Sequences (>4096) (num_proc=36): 75%|βββββββββββββββββββββββββββββββββββββββββββββ | 251/333 [00:00<00:00, 345.84 examples/s]
Dropping Long Sequences (>4096) (num_proc=36): 100%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 333/333 [00:01<00:00, 273.41 examples/s]
Saving the dataset (0/1 shards): 0%| | 0/333 [00:00<?, ? examples/s]
Saving the dataset (1/1 shards): 100%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 333/333 [00:00<00:00, 29276.08 examples/s]
Saving the dataset (1/1 shards): 100%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 333/333 [00:00<00:00, 28696.24 examples/s]
[2025-10-22 12:55:31,719] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:404] [PID:2418] total_num_tokens: 94_264
[2025-10-22 12:55:31,721] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:422] [PID:2418] `total_supervised_tokens: 82_964`
[2025-10-22 12:55:31,721] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:520] [PID:2418] total_num_steps: 63
[2025-10-22 12:55:31,721] [INFO] [axolotl.utils.data.sft._prepare_standard_dataset:121] [PID:2418] Maximum number of steps set at 63
[2025-10-22 12:55:31,750] [DEBUG] [axolotl.train.setup_model_and_tokenizer:65] [PID:2418] Loading tokenizer... Qwen/Qwen2.5-7B-Instruct
[2025-10-22 12:55:32,146] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:278] [PID:2418] EOS: 151645 / <|im_end|>
[2025-10-22 12:55:32,147] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:279] [PID:2418] BOS: None / None
[2025-10-22 12:55:32,147] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:280] [PID:2418] PAD: 151643 / <|endoftext|>
[2025-10-22 12:55:32,147] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:281] [PID:2418] UNK: None / None
[2025-10-22 12:55:32,147] [DEBUG] [axolotl.train.setup_model_and_tokenizer:74] [PID:2418] Loading model
[2025-10-22 12:55:32,205] [DEBUG] [axolotl.monkeypatch.transformers.trainer_loss_calc.patch_evaluation_loop:87] [PID:2418] Patched Trainer.evaluation_loop with nanmean loss calculation
[2025-10-22 12:55:32,206] [DEBUG] [axolotl.monkeypatch.transformers.trainer_loss_calc.patch_maybe_log_save_evaluate:138] [PID:2418] Patched Trainer._maybe_log_save_evaluate with nanmean loss calculation
model.safetensors.index.json: 0.00B [00:00, ?B/s]
model.safetensors.index.json: 27.8kB [00:00, 193MB/s]
model-00001-of-00004.safetensors: 0%| | 0.00/3.95G [00:00<?, ?B/s]
model-00001-of-00004.safetensors: 0%| | 43.8k/3.95G [00:01<28:28:22, 38.5kB/s]
model-00001-of-00004.safetensors: 0%| | 1.96M/3.95G [00:01<32:15, 2.04MB/s]
model-00001-of-00004.safetensors: 0%|β | 9.56M/3.95G [00:01<05:51, 11.2MB/s]
model-00001-of-00004.safetensors: 0%|β | 15.5M/3.95G [00:01<03:33, 18.4MB/s]
model-00001-of-00004.safetensors: 1%|β | 23.7M/3.95G [00:01<02:16, 28.7MB/s]
model-00001-of-00004.safetensors: 1%|β | 32.9M/3.95G [00:01<01:36, 40.7MB/s]
model-00001-of-00004.safetensors: 1%|β | 43.8M/3.95G [00:01<01:21, 47.9MB/s]
model-00001-of-00004.safetensors: 1%|β | 53.3M/3.95G [00:02<01:08, 56.7MB/s]
model-00001-of-00004.safetensors: 2%|ββ | 60.5M/3.95G [00:02<01:05, 59.5MB/s]
model-00001-of-00004.safetensors: 2%|ββ | 69.4M/3.95G [00:02<01:09, 55.8MB/s]
model-00001-of-00004.safetensors: 2%|ββ | 88.0M/3.95G [00:02<00:54, 70.5MB/s]
model-00001-of-00004.safetensors: 3%|βββ | 120M/3.95G [00:02<00:41, 92.4MB/s]
model-00001-of-00004.safetensors: 4%|ββββ | 168M/3.95G [00:03<00:30, 123MB/s]
model-00001-of-00004.safetensors: 6%|βββββ | 235M/3.95G [00:03<00:19, 193MB/s]
model-00001-of-00004.safetensors: 8%|ββββββ | 303M/3.95G [00:03<00:21, 172MB/s]
model-00001-of-00004.safetensors: 9%|ββββββββ | 356M/3.95G [00:03<00:19, 184MB/s]
model-00001-of-00004.safetensors: 11%|βββββββββ | 423M/3.95G [00:04<00:16, 216MB/s]
model-00001-of-00004.safetensors: 11%|βββββββββ | 447M/3.95G [00:04<00:19, 180MB/s]
model-00001-of-00004.safetensors: 12%|ββββββββββ | 468M/3.95G [00:04<00:23, 151MB/s]
model-00001-of-00004.safetensors: 12%|ββββββββββ | 489M/3.95G [00:04<00:23, 148MB/s]
model-00001-of-00004.safetensors: 13%|ββββββββββ | 504M/3.95G [00:04<00:26, 131MB/s]
model-00001-of-00004.safetensors: 14%|βββββββββββ | 547M/3.95G [00:05<00:25, 134MB/s]
model-00001-of-00004.safetensors: 16%|βββββββββββββ | 634M/3.95G [00:05<00:15, 214MB/s]
model-00001-of-00004.safetensors: 18%|ββββββββββββββ | 701M/3.95G [00:05<00:12, 254MB/s]
model-00001-of-00004.safetensors: 20%|ββββββββββββββββ | 802M/3.95G [00:06<00:18, 167MB/s]
model-00001-of-00004.safetensors: 22%|ββββββββββββββββββ | 869M/3.95G [00:06<00:15, 204MB/s]
model-00001-of-00004.safetensors: 24%|βββββββββββββββββββ | 936M/3.95G [00:06<00:12, 244MB/s]
model-00001-of-00004.safetensors: 25%|ββββββββββββββββββββ | 1.00G/3.95G [00:06<00:10, 283MB/s]
model-00001-of-00004.safetensors: 27%|ββββββββββββββββββββββ | 1.08G/3.95G [00:08<00:20, 143MB/s]
model-00001-of-00004.safetensors: 29%|βββββββββββββββββββββββ | 1.16G/3.95G [00:08<00:16, 169MB/s]
model-00001-of-00004.safetensors: 31%|βββββββββββββββββββββββββ | 1.22G/3.95G [00:08<00:13, 208MB/s]
model-00001-of-00004.safetensors: 33%|ββββββββββββββββββββββββββ | 1.29G/3.95G [00:08<00:11, 230MB/s]
model-00001-of-00004.safetensors: 34%|βββββββββββββββββββββββββββ | 1.32G/3.95G [00:08<00:12, 209MB/s]
model-00001-of-00004.safetensors: 35%|ββββββββββββββββββββββββββββ | 1.39G/3.95G [00:09<00:11, 224MB/s]
model-00001-of-00004.safetensors: 37%|βββββββββββββββββββββββββββββ | 1.47G/3.95G [00:09<00:11, 216MB/s]
model-00001-of-00004.safetensors: 39%|βββββββββββββββββββββββββββββββ | 1.55G/3.95G [00:09<00:09, 248MB/s]
model-00001-of-00004.safetensors: 41%|ββββββββββββββββββββββββββββββββ | 1.62G/3.95G [00:09<00:08, 285MB/s]
model-00001-of-00004.safetensors: 42%|βββββββββββββββββββββββββββββββββ | 1.67G/3.95G [00:10<00:08, 279MB/s]
model-00001-of-00004.safetensors: 43%|ββββββββββββββββββββββββββββββββββ | 1.71G/3.95G [00:10<00:08, 249MB/s]
model-00001-of-00004.safetensors: 44%|βββββββββββββββββββββββββββββββββββ | 1.73G/3.95G [00:10<00:09, 222MB/s]
model-00001-of-00004.safetensors: 45%|ββββββββββββββββββββββββββββββββββββ | 1.78G/3.95G [00:10<00:08, 258MB/s]
model-00001-of-00004.safetensors: 47%|βββββββββββββββββββββββββββββββββββββ | 1.85G/3.95G [00:10<00:07, 263MB/s]
model-00001-of-00004.safetensors: 49%|ββββββββββββββββββββββββββββββββββββββ | 1.92G/3.95G [00:11<00:07, 264MB/s]
model-00001-of-00004.safetensors: 50%|ββββββββββββββββββββββββββββββββββββββββ | 1.99G/3.95G [00:11<00:07, 264MB/s]
model-00001-of-00004.safetensors: 51%|ββββββββββββββββββββββββββββββββββββββββ | 2.02G/3.95G [00:11<00:07, 252MB/s]
model-00001-of-00004.safetensors: 53%|ββββββββββββββββββββββββββββββββββββββββββ | 2.08G/3.95G [00:11<00:06, 300MB/s]
model-00001-of-00004.safetensors: 54%|βββββββββββββββββββββββββββββββββββββββββββ | 2.15G/3.95G [00:11<00:05, 337MB/s]
model-00001-of-00004.safetensors: 56%|ββββββββββββββββββββββββββββββββββββββββββββ | 2.22G/3.95G [00:12<00:05, 304MB/s]
model-00001-of-00004.safetensors: 58%|ββββββββββββββββββββββββββββββββββββββββββββββ | 2.28G/3.95G [00:12<00:05, 295MB/s]
model-00001-of-00004.safetensors: 59%|ββββββββββββββββββββββββββββββββββββββββββββββ | 2.33G/3.95G [00:12<00:06, 241MB/s]
model-00001-of-00004.safetensors: 61%|ββββββββββββββββββββββββββββββββββββββββββββββββ | 2.42G/3.95G [00:12<00:05, 299MB/s]
model-00001-of-00004.safetensors: 62%|βββββββββββββββββββββββββββββββββββββββββββββββββ | 2.46G/3.95G [00:13<00:05, 252MB/s]
model-00001-of-00004.safetensors: 64%|βββββββββββββββββββββββββββββββββββββββββββββββββββ | 2.54G/3.95G [00:13<00:04, 307MB/s]
model-00001-of-00004.safetensors: 66%|ββββββββββββββββββββββββββββββββββββββββββββββββββββ | 2.60G/3.95G [00:13<00:04, 292MB/s]
model-00001-of-00004.safetensors: 68%|βββββββββββββββββββββββββββββββββββββββββββββββββββββ | 2.67G/3.95G [00:13<00:03, 322MB/s]
model-00001-of-00004.safetensors: 69%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 2.74G/3.95G [00:13<00:03, 353MB/s]
model-00001-of-00004.safetensors: 71%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 2.79G/3.95G [00:13<00:03, 372MB/s]
model-00001-of-00004.safetensors: 72%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 2.86G/3.95G [00:14<00:02, 410MB/s]
model-00001-of-00004.safetensors: 74%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 2.90G/3.95G [00:14<00:02, 419MB/s]
model-00001-of-00004.safetensors: 75%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 2.97G/3.95G [00:14<00:02, 397MB/s]
model-00001-of-00004.safetensors: 77%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 3.04G/3.95G [00:14<00:02, 436MB/s]
model-00001-of-00004.safetensors: 78%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 3.10G/3.95G [00:14<00:01, 432MB/s]
model-00001-of-00004.safetensors: 80%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 3.16G/3.95G [00:14<00:01, 459MB/s]
model-00001-of-00004.safetensors: 82%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 3.23G/3.95G [00:14<00:01, 479MB/s]
model-00001-of-00004.safetensors: 84%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 3.30G/3.95G [00:15<00:01, 496MB/s]
model-00001-of-00004.safetensors: 85%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 3.36G/3.95G [00:15<00:01, 503MB/s]
model-00001-of-00004.safetensors: 87%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 3.43G/3.95G [00:15<00:01, 507MB/s]
model-00001-of-00004.safetensors: 89%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 3.50G/3.95G [00:15<00:00, 506MB/s]
model-00001-of-00004.safetensors: 90%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 3.57G/3.95G [00:15<00:00, 514MB/s]
model-00001-of-00004.safetensors: 93%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 3.68G/3.95G [00:15<00:00, 524MB/s]
model-00001-of-00004.safetensors: 95%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 3.74G/3.95G [00:15<00:00, 538MB/s]
model-00001-of-00004.safetensors: 97%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 3.81G/3.95G [00:15<00:00, 531MB/s]
model-00001-of-00004.safetensors: 98%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 3.88G/3.95G [00:16<00:00, 537MB/s]
model-00001-of-00004.safetensors: 100%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 3.95G/3.95G [00:16<00:00, 529MB/s]
model-00001-of-00004.safetensors: 100%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 3.95G/3.95G [00:16<00:00, 243MB/s]
model-00002-of-00004.safetensors: 0%| | 0.00/3.86G [00:00<?, ?B/s]
model-00002-of-00004.safetensors: 0%| | 3.45M/3.86G [00:01<19:55, 3.23MB/s]
model-00002-of-00004.safetensors: 3%|βββ | 107M/3.86G [00:01<00:39, 94.4MB/s]
model-00002-of-00004.safetensors: 5%|ββββ | 185M/3.86G [00:01<00:23, 154MB/s]
model-00002-of-00004.safetensors: 7%|ββββββ | 280M/3.86G [00:01<00:15, 224MB/s]
model-00002-of-00004.safetensors: 9%|βββββββ | 336M/3.86G [00:02<00:14, 250MB/s]
model-00002-of-00004.safetensors: 11%|βββββββββ | 425M/3.86G [00:02<00:14, 243MB/s]
model-00002-of-00004.safetensors: 13%|ββββββββββ | 492M/3.86G [00:02<00:12, 268MB/s]
model-00002-of-00004.safetensors: 14%|ββββββββββββ | 559M/3.86G [00:02<00:10, 302MB/s]
model-00002-of-00004.safetensors: 16%|βββββββββββββ | 626M/3.86G [00:02<00:09, 344MB/s]
model-00002-of-00004.safetensors: 18%|βββββββββββββββ | 697M/3.86G [00:03<00:08, 364MB/s]
model-00002-of-00004.safetensors: 20%|ββββββββββββββββ | 764M/3.86G [00:03<00:08, 354MB/s]
model-00002-of-00004.safetensors: 21%|βββββββββββββββββ | 831M/3.86G [00:03<00:07, 387MB/s]
model-00002-of-00004.safetensors: 23%|βββββββββββββββββββ | 898M/3.86G [00:03<00:07, 396MB/s]
model-00002-of-00004.safetensors: 25%|ββββββββββββββββββββ | 965M/3.86G [00:03<00:06, 423MB/s]
model-00002-of-00004.safetensors: 27%|βββββββββββββββββββββ | 1.03G/3.86G [00:04<00:08, 317MB/s]
model-00002-of-00004.safetensors: 28%|ββββββββββββββββββββββ | 1.08G/3.86G [00:04<00:11, 242MB/s]
model-00002-of-00004.safetensors: 29%|βββββββββββββββββββββββ | 1.13G/3.86G [00:04<00:10, 267MB/s]
model-00002-of-00004.safetensors: 31%|ββββββββββββββββββββββββ | 1.18G/3.86G [00:04<00:09, 298MB/s]
model-00002-of-00004.safetensors: 32%|βββββββββββββββββββββββββ | 1.24G/3.86G [00:04<00:07, 335MB/s]
model-00002-of-00004.safetensors: 33%|ββββββββββββββββββββββββββ | 1.29G/3.86G [00:04<00:08, 289MB/s]
model-00002-of-00004.safetensors: 35%|ββββββββββββββββββββββββββββ | 1.36G/3.86G [00:05<00:13, 184MB/s]
model-00002-of-00004.safetensors: 37%|βββββββββββββββββββββββββββββ | 1.41G/3.86G [00:05<00:11, 213MB/s]
model-00002-of-00004.safetensors: 38%|ββββββββββββββββββββββββββββββ | 1.48G/3.86G [00:05<00:09, 254MB/s]
model-00002-of-00004.safetensors: 40%|ββββββββββββββββββββββββββββββββ | 1.54G/3.86G [00:06<00:08, 277MB/s]
model-00002-of-00004.safetensors: 42%|βββββββββββββββββββββββββββββββββ | 1.61G/3.86G [00:06<00:07, 313MB/s]
model-00002-of-00004.safetensors: 43%|ββββββββββββββββββββββββββββββββββ | 1.68G/3.86G [00:06<00:06, 350MB/s]
model-00002-of-00004.safetensors: 45%|ββββββββββββββββββββββββββββββββββββ | 1.75G/3.86G [00:06<00:05, 377MB/s]
model-00002-of-00004.safetensors: 47%|βββββββββββββββββββββββββββββββββββββ | 1.81G/3.86G [00:06<00:05, 385MB/s]
model-00002-of-00004.safetensors: 49%|ββββββββββββββββββββββββββββββββββββββ | 1.88G/3.86G [00:06<00:04, 415MB/s]
model-00002-of-00004.safetensors: 50%|ββββββββββββββββββββββββββββββββββββββββ | 1.95G/3.86G [00:06<00:04, 426MB/s]
model-00002-of-00004.safetensors: 52%|βββββββββββββββββββββββββββββββββββββββββ | 2.01G/3.86G [00:07<00:04, 449MB/s]
model-00002-of-00004.safetensors: 54%|ββββββββββββββββββββββββββββββββββββββββββ | 2.08G/3.86G [00:07<00:03, 456MB/s]
model-00002-of-00004.safetensors: 56%|ββββββββββββββββββββββββββββββββββββββββββββ | 2.15G/3.86G [00:07<00:03, 478MB/s]
model-00002-of-00004.safetensors: 57%|βββββββββββββββββββββββββββββββββββββββββββββ | 2.22G/3.86G [00:07<00:03, 422MB/s]
model-00002-of-00004.safetensors: 59%|ββββββββββββββββββββββββββββββββββββββββββββββ | 2.28G/3.86G [00:07<00:03, 439MB/s]
model-00002-of-00004.safetensors: 61%|ββββββββββββββββββββββββββββββββββββββββββββββββ | 2.35G/3.86G [00:07<00:03, 422MB/s]
model-00002-of-00004.safetensors: 63%|βββββββββββββββββββββββββββββββββββββββββββββββββ | 2.42G/3.86G [00:08<00:03, 370MB/s]
model-00002-of-00004.safetensors: 64%|βββββββββββββββββββββββββββββββββββββββββββββββββββ | 2.48G/3.86G [00:08<00:03, 364MB/s]
model-00002-of-00004.safetensors: 66%|ββββββββββββββββββββββββββββββββββββββββββββββββββββ | 2.55G/3.86G [00:08<00:03, 380MB/s]
model-00002-of-00004.safetensors: 68%|βββββββββββββββββββββββββββββββββββββββββββββββββββββ | 2.62G/3.86G [00:08<00:03, 398MB/s]
model-00002-of-00004.safetensors: 69%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 2.69G/3.86G [00:08<00:02, 422MB/s]
model-00002-of-00004.safetensors: 71%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 2.75G/3.86G [00:08<00:02, 406MB/s]
model-00002-of-00004.safetensors: 73%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 2.82G/3.86G [00:09<00:02, 399MB/s]
model-00002-of-00004.safetensors: 75%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 2.89G/3.86G [00:09<00:02, 403MB/s]
model-00002-of-00004.safetensors: 76%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 2.95G/3.86G [00:09<00:02, 441MB/s]
model-00002-of-00004.safetensors: 78%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 3.02G/3.86G [00:09<00:01, 457MB/s]
model-00002-of-00004.safetensors: 80%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 3.09G/3.86G [00:09<00:01, 474MB/s]
model-00002-of-00004.safetensors: 82%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 3.15G/3.86G [00:09<00:01, 465MB/s]
model-00002-of-00004.safetensors: 83%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 3.22G/3.86G [00:09<00:01, 474MB/s]
model-00002-of-00004.safetensors: 85%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 3.29G/3.86G [00:10<00:01, 487MB/s]
model-00002-of-00004.safetensors: 87%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 3.36G/3.86G [00:10<00:01, 480MB/s]
model-00002-of-00004.safetensors: 89%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 3.42G/3.86G [00:10<00:00, 480MB/s]
model-00002-of-00004.safetensors: 90%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 3.49G/3.86G [00:10<00:00, 476MB/s]
model-00002-of-00004.safetensors: 92%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 3.56G/3.86G [00:10<00:00, 485MB/s]
model-00002-of-00004.safetensors: 94%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 3.62G/3.86G [00:10<00:00, 454MB/s]
model-00002-of-00004.safetensors: 96%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 3.69G/3.86G [00:10<00:00, 461MB/s]
model-00002-of-00004.safetensors: 97%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 3.76G/3.86G [00:11<00:00, 475MB/s]
model-00002-of-00004.safetensors: 99%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 3.82G/3.86G [00:11<00:00, 446MB/s]
model-00002-of-00004.safetensors: 100%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 3.86G/3.86G [00:11<00:00, 444MB/s]
model-00002-of-00004.safetensors: 100%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 3.86G/3.86G [00:11<00:00, 341MB/s]
model-00003-of-00004.safetensors: 0%| | 0.00/3.86G [00:00<?, ?B/s]
model-00003-of-00004.safetensors: 0%| | 1.37M/3.86G [00:01<51:34, 1.25MB/s]
model-00003-of-00004.safetensors: 0%|β | 15.4M/3.86G [00:01<03:45, 17.1MB/s]
model-00003-of-00004.safetensors: 2%|ββ | 79.2M/3.86G [00:01<00:55, 67.9MB/s]
model-00003-of-00004.safetensors: 7%|ββββββ | 280M/3.86G [00:01<00:13, 258MB/s]
model-00003-of-00004.safetensors: 9%|βββββββ | 347M/3.86G [00:02<00:11, 295MB/s]
model-00003-of-00004.safetensors: 11%|βββββββββ | 414M/3.86G [00:02<00:12, 279MB/s]
model-00003-of-00004.safetensors: 12%|ββββββββββ | 478M/3.86G [00:02<00:12, 273MB/s]
model-00003-of-00004.safetensors: 14%|ββββββββββββ | 545M/3.86G [00:02<00:10, 319MB/s]
model-00003-of-00004.safetensors: 16%|βββββββββββββ | 612M/3.86G [00:02<00:09, 360MB/s]
model-00003-of-00004.safetensors: 18%|ββββββββββββββ | 678M/3.86G [00:02<00:08, 384MB/s]
model-00003-of-00004.safetensors: 19%|ββββββββββββββββ | 745M/3.86G [00:03<00:07, 418MB/s]
model-00003-of-00004.safetensors: 21%|βββββββββββββββββ | 812M/3.86G [00:03<00:09, 316MB/s]
model-00003-of-00004.safetensors: 23%|ββββββββββββββββββ | 879M/3.86G [00:03<00:08, 337MB/s]
model-00003-of-00004.safetensors: 24%|ββββββββββββββββββββ | 946M/3.86G [00:03<00:07, 373MB/s]
model-00003-of-00004.safetensors: 26%|βββββββββββββββββββββ | 1.01G/3.86G [00:04<00:09, 303MB/s]
model-00003-of-00004.safetensors: 28%|ββββββββββββββββββββββ | 1.07G/3.86G [00:04<00:09, 283MB/s]
model-00003-of-00004.safetensors: 29%|βββββββββββββββββββββββ | 1.13G/3.86G [00:04<00:10, 257MB/s]
model-00003-of-00004.safetensors: 30%|ββββββββββββββββββββββββ | 1.16G/3.86G [00:04<00:11, 227MB/s]
model-00003-of-00004.safetensors: 31%|ββββββββββββββββββββββββ | 1.19G/3.86G [00:05<00:14, 184MB/s]
model-00003-of-00004.safetensors: 31%|βββββββββββββββββββββββββ | 1.21G/3.86G [00:05<00:17, 154MB/s]
model-00003-of-00004.safetensors: 33%|ββββββββββββββββββββββββββ | 1.26G/3.86G [00:05<00:12, 200MB/s]
model-00003-of-00004.safetensors: 33%|ββββββββββββββββββββββββββ | 1.29G/3.86G [00:05<00:13, 192MB/s]
model-00003-of-00004.safetensors: 35%|ββββββββββββββββββββββββββββ | 1.35G/3.86G [00:05<00:10, 230MB/s]
model-00003-of-00004.safetensors: 37%|βββββββββββββββββββββββββββββ | 1.42G/3.86G [00:05<00:08, 283MB/s]
model-00003-of-00004.safetensors: 38%|ββββββββββββββββββββββββββββββ | 1.48G/3.86G [00:06<00:06, 345MB/s]
model-00003-of-00004.safetensors: 40%|ββββββββββββββββββββββββββββββββ | 1.55G/3.86G [00:06<00:05, 387MB/s]
model-00003-of-00004.safetensors: 42%|βββββββββββββββββββββββββββββββββ | 1.61G/3.86G [00:06<00:06, 343MB/s]
model-00003-of-00004.safetensors: 43%|ββββββββββββββββββββββββββββββββββ | 1.68G/3.86G [00:06<00:06, 356MB/s]
model-00003-of-00004.safetensors: 45%|ββββββββββββββββββββββββββββββββββββ | 1.75G/3.86G [00:06<00:05, 405MB/s]
model-00003-of-00004.safetensors: 47%|βββββββββββββββββββββββββββββββββββββ | 1.81G/3.86G [00:06<00:04, 427MB/s]
model-00003-of-00004.safetensors: 49%|ββββββββββββββββββββββββββββββββββββββ | 1.88G/3.86G [00:06<00:04, 429MB/s]
model-00003-of-00004.safetensors: 50%|ββββββββββββββββββββββββββββββββββββββββ | 1.95G/3.86G [00:07<00:04, 448MB/s]
model-00003-of-00004.safetensors: 52%|βββββββββββββββββββββββββββββββββββββββββ | 2.01G/3.86G [00:07<00:04, 448MB/s]
model-00003-of-00004.safetensors: 54%|ββββββββββββββββββββββββββββββββββββββββββ | 2.08G/3.86G [00:07<00:04, 380MB/s]
model-00003-of-00004.safetensors: 55%|ββββββββββββββββββββββββββββββββββββββββββββ | 2.14G/3.86G [00:07<00:05, 322MB/s]
model-00003-of-00004.safetensors: 57%|βββββββββββββββββββββββββββββββββββββββββββββ | 2.20G/3.86G [00:07<00:04, 362MB/s]
model-00003-of-00004.safetensors: 58%|ββββββββββββββββββββββββββββββββββββββββββββββ | 2.26G/3.86G [00:08<00:04, 364MB/s]
model-00003-of-00004.safetensors: 60%|βββββββββββββββββββββββββββββββββββββββββββββββ | 2.31G/3.86G [00:08<00:04, 365MB/s]
model-00003-of-00004.safetensors: 61%|ββββββββββββββββββββββββββββββββββββββββββββββββ | 2.37G/3.86G [00:08<00:03, 389MB/s]
model-00003-of-00004.safetensors: 63%|ββββββββββββββββββββββββββββββββββββββββββββββββββ | 2.45G/3.86G [00:08<00:03, 385MB/s]
model-00003-of-00004.safetensors: 65%|βββββββββββββββββββββββββββββββββββββββββββββββββββ | 2.50G/3.86G [00:08<00:03, 402MB/s]
model-00003-of-00004.safetensors: 66%|ββββββββββββββββββββββββββββββββββββββββββββββββββββ | 2.57G/3.86G [00:08<00:03, 427MB/s]
model-00003-of-00004.safetensors: 68%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 2.63G/3.86G [00:08<00:02, 442MB/s]
model-00003-of-00004.safetensors: 70%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 2.70G/3.86G [00:09<00:02, 454MB/s]
model-00003-of-00004.safetensors: 72%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 2.77G/3.86G [00:09<00:02, 463MB/s]
model-00003-of-00004.safetensors: 73%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 2.84G/3.86G [00:09<00:02, 471MB/s]
model-00003-of-00004.safetensors: 75%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 2.90G/3.86G [00:09<00:02, 469MB/s]
model-00003-of-00004.safetensors: 77%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 2.97G/3.86G [00:09<00:01, 448MB/s]
model-00003-of-00004.safetensors: 78%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 3.03G/3.86G [00:09<00:01, 429MB/s]
model-00003-of-00004.safetensors: 80%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 3.10G/3.86G [00:09<00:01, 425MB/s]
model-00003-of-00004.safetensors: 82%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 3.17G/3.86G [00:10<00:01, 433MB/s]
model-00003-of-00004.safetensors: 84%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 3.24G/3.86G [00:10<00:01, 436MB/s]
model-00003-of-00004.safetensors: 85%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 3.30G/3.86G [00:10<00:01, 418MB/s]
model-00003-of-00004.safetensors: 87%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 3.37G/3.86G [00:10<00:01, 423MB/s]
model-00003-of-00004.safetensors: 89%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 3.44G/3.86G [00:10<00:01, 427MB/s]
model-00003-of-00004.safetensors: 91%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 3.50G/3.86G [00:10<00:00, 429MB/s]
model-00003-of-00004.safetensors: 92%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 3.57G/3.86G [00:11<00:00, 440MB/s]
model-00003-of-00004.safetensors: 94%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 3.64G/3.86G [00:11<00:00, 409MB/s]
model-00003-of-00004.safetensors: 96%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 3.71G/3.86G [00:11<00:00, 424MB/s]
model-00003-of-00004.safetensors: 98%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 3.78G/3.86G [00:11<00:00, 415MB/s]
model-00003-of-00004.safetensors: 100%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 3.86G/3.86G [00:11<00:00, 392MB/s]
model-00003-of-00004.safetensors: 100%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 3.86G/3.86G [00:11<00:00, 327MB/s]
model-00004-of-00004.safetensors: 0%| | 0.00/3.56G [00:00<?, ?B/s]
model-00004-of-00004.safetensors: 0%| | 9.42k/3.56G [00:01<163:05:23, 6.06kB/s]
model-00004-of-00004.safetensors: 1%|β | 44.7M/3.56G [00:01<01:50, 31.8MB/s]
model-00004-of-00004.safetensors: 4%|ββββ | 145M/3.56G [00:02<00:38, 87.6MB/s]
model-00004-of-00004.safetensors: 6%|βββββ | 202M/3.56G [00:02<00:30, 110MB/s]
model-00004-of-00004.safetensors: 8%|ββββββ | 269M/3.56G [00:02<00:21, 150MB/s]
model-00004-of-00004.safetensors: 9%|ββββββββ | 336M/3.56G [00:03<00:18, 177MB/s]
model-00004-of-00004.safetensors: 11%|βββββββββ | 403M/3.56G [00:03<00:15, 198MB/s]
model-00004-of-00004.safetensors: 13%|βββββββββββ | 470M/3.56G [00:03<00:14, 219MB/s]
model-00004-of-00004.safetensors: 15%|ββββββββββββ | 538M/3.56G [00:03<00:11, 253MB/s]
model-00004-of-00004.safetensors: 17%|ββββββββββββββ | 604M/3.56G [00:04<00:10, 279MB/s]
model-00004-of-00004.safetensors: 19%|βββββββββββββββ | 671M/3.56G [00:04<00:09, 313MB/s]
model-00004-of-00004.safetensors: 21%|βββββββββββββββββ | 738M/3.56G [00:04<00:09, 305MB/s]
model-00004-of-00004.safetensors: 23%|ββββββββββββββββββ | 805M/3.56G [00:04<00:08, 331MB/s]
model-00004-of-00004.safetensors: 25%|ββββββββββββββββββββ | 873M/3.56G [00:04<00:09, 284MB/s]
model-00004-of-00004.safetensors: 27%|ββββββββββββββββββββββ | 970M/3.56G [00:05<00:13, 197MB/s]
model-00004-of-00004.safetensors: 29%|βββββββββββββββββββββββ | 1.04G/3.56G [00:05<00:11, 228MB/s]
model-00004-of-00004.safetensors: 31%|ββββββββββββββββββββββββ | 1.09G/3.56G [00:06<00:10, 230MB/s]
model-00004-of-00004.safetensors: 32%|βββββββββββββββββββββββββ | 1.13G/3.56G [00:06<00:09, 244MB/s]
model-00004-of-00004.safetensors: 34%|βββββββββββββββββββββββββββ | 1.20G/3.56G [00:06<00:08, 265MB/s]
model-00004-of-00004.safetensors: 36%|ββββββββββββββββββββββββββββ | 1.27G/3.56G [00:06<00:07, 296MB/s]
model-00004-of-00004.safetensors: 37%|βββββββββββββββββββββββββββββ | 1.31G/3.56G [00:06<00:07, 306MB/s]
model-00004-of-00004.safetensors: 38%|ββββββββββββββββββββββββββββββ | 1.35G/3.56G [00:06<00:07, 300MB/s]
model-00004-of-00004.safetensors: 40%|βββββββββββββββββββββββββββββββ | 1.41G/3.56G [00:07<00:12, 174MB/s]
model-00004-of-00004.safetensors: 42%|βββββββββββββββββββββββββββββββββ | 1.48G/3.56G [00:07<00:10, 205MB/s]
model-00004-of-00004.safetensors: 43%|ββββββββββββββββββββββββββββββββββ | 1.54G/3.56G [00:07<00:08, 237MB/s]
model-00004-of-00004.safetensors: 45%|ββββββββββββββββββββββββββββββββββββ | 1.61G/3.56G [00:08<00:07, 265MB/s]
model-00004-of-00004.safetensors: 47%|βββββββββββββββββββββββββββββββββββββ | 1.68G/3.56G [00:08<00:06, 307MB/s]
model-00004-of-00004.safetensors: 49%|βββββββββββββββββββββββββββββββββββββββ | 1.75G/3.56G [00:08<00:05, 344MB/s]
model-00004-of-00004.safetensors: 51%|ββββββββββββββββββββββββββββββββββββββββ | 1.81G/3.56G [00:08<00:04, 364MB/s]
model-00004-of-00004.safetensors: 53%|ββββββββββββββββββββββββββββββββββββββββββ | 1.88G/3.56G [00:08<00:04, 380MB/s]
model-00004-of-00004.safetensors: 55%|βββββββββββββββββββββββββββββββββββββββββββ | 1.95G/3.56G [00:08<00:04, 393MB/s]
model-00004-of-00004.safetensors: 57%|βββββββββββββββββββββββββββββββββββββββββββββ | 2.01G/3.56G [00:08<00:03, 408MB/s]
model-00004-of-00004.safetensors: 58%|ββββββββββββββββββββββββββββββββββββββββββββββ | 2.08G/3.56G [00:09<00:03, 386MB/s]
model-00004-of-00004.safetensors: 60%|βββββββββββββββββββββββββββββββββββββββββββββββ | 2.15G/3.56G [00:09<00:03, 408MB/s]
model-00004-of-00004.safetensors: 62%|βββββββββββββββββββββββββββββββββββββββββββββββββ | 2.20G/3.56G [00:09<00:03, 344MB/s]
model-00004-of-00004.safetensors: 63%|ββββββββββββββββββββββββββββββββββββββββββββββββββ | 2.25G/3.56G [00:09<00:03, 353MB/s]
model-00004-of-00004.safetensors: 65%|βββββββββββββββββββββββββββββββββββββββββββββββββββ | 2.31G/3.56G [00:09<00:03, 358MB/s]
model-00004-of-00004.safetensors: 66%|ββββββββββββββββββββββββββββββββββββββββββββββββββββ | 2.36G/3.56G [00:09<00:03, 359MB/s]
model-00004-of-00004.safetensors: 68%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 2.44G/3.56G [00:10<00:03, 366MB/s]
model-00004-of-00004.safetensors: 70%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 2.51G/3.56G [00:10<00:02, 359MB/s]
model-00004-of-00004.safetensors: 72%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 2.55G/3.56G [00:10<00:02, 355MB/s]
model-00004-of-00004.safetensors: 74%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 2.62G/3.56G [00:10<00:02, 381MB/s]
model-00004-of-00004.safetensors: 75%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 2.68G/3.56G [00:10<00:02, 384MB/s]
model-00004-of-00004.safetensors: 77%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 2.75G/3.56G [00:10<00:02, 389MB/s]
model-00004-of-00004.safetensors: 79%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 2.82G/3.56G [00:11<00:01, 409MB/s]
model-00004-of-00004.safetensors: 81%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 2.89G/3.56G [00:11<00:01, 402MB/s]
model-00004-of-00004.safetensors: 83%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 2.95G/3.56G [00:11<00:01, 401MB/s]
model-00004-of-00004.safetensors: 85%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 3.02G/3.56G [00:11<00:01, 415MB/s]
model-00004-of-00004.safetensors: 87%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 3.09G/3.56G [00:11<00:01, 423MB/s]
model-00004-of-00004.safetensors: 89%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 3.15G/3.56G [00:11<00:00, 423MB/s]
model-00004-of-00004.safetensors: 91%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 3.22G/3.56G [00:12<00:00, 416MB/s]
model-00004-of-00004.safetensors: 92%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 3.29G/3.56G [00:12<00:00, 431MB/s]
model-00004-of-00004.safetensors: 94%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 3.36G/3.56G [00:12<00:00, 417MB/s]
model-00004-of-00004.safetensors: 96%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 3.42G/3.56G [00:12<00:00, 425MB/s]
model-00004-of-00004.safetensors: 98%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 3.49G/3.56G [00:12<00:00, 437MB/s]
model-00004-of-00004.safetensors: 100%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 3.56G/3.56G [00:12<00:00, 445MB/s]
model-00004-of-00004.safetensors: 100%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 3.56G/3.56G [00:12<00:00, 277MB/s]
Loading checkpoint shards: 0%| | 0/4 [00:00<?, ?it/s]
Loading checkpoint shards: 25%|βββββββββββββββββββββββ | 1/4 [00:06<00:18, 6.20s/it]
Loading checkpoint shards: 50%|ββββββββββββββββββββββββββββββββββββββββββββββ | 2/4 [00:12<00:12, 6.48s/it]
Loading checkpoint shards: 75%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 3/4 [00:20<00:06, 6.99s/it]
Loading checkpoint shards: 100%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 4/4 [00:25<00:00, 6.17s/it]
Loading checkpoint shards: 100%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 4/4 [00:25<00:00, 6.35s/it]
generation_config.json: 0%| | 0.00/243 [00:00<?, ?B/s]
generation_config.json: 100%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 243/243 [00:00<00:00, 2.95MB/s]
[2025-10-22 12:56:50,899] [INFO] [axolotl.loaders.model._prepare_model_for_quantization:863] [PID:2418] converting PEFT model w/ prepare_model_for_kbit_training
[2025-10-22 12:56:50,901] [INFO] [axolotl.loaders.model._configure_embedding_dtypes:345] [PID:2418] Converting modules to torch.bfloat16
[2025-10-22 12:56:50,903] [DEBUG] [axolotl.loaders.model.log_gpu_memory_usage:127] [PID:2418] Memory usage after model load 11.676GB (+11.676GB allocated, +13.172GB reserved)
trainable params: 20,185,088 || all params: 7,635,801,600 || trainable%: 0.2643
[2025-10-22 12:56:51,121] [DEBUG] [axolotl.loaders.model.log_gpu_memory_usage:127] [PID:2418] after adapters 8.565GB (+8.565GB allocated, +13.248GB reserved)
[2025-10-22 12:56:57,347] [INFO] [axolotl.train.save_initial_configs:398] [PID:2418] Pre-saving adapter config to /workspace/fine-tuning/output...
[2025-10-22 12:56:57,347] [INFO] [axolotl.train.save_initial_configs:402] [PID:2418] Pre-saving tokenizer to /workspace/fine-tuning/output...
[2025-10-22 12:56:57,464] [INFO] [axolotl.train.save_initial_configs:407] [PID:2418] Pre-saving model config to /workspace/fine-tuning/output...
[2025-10-22 12:56:57,466] [INFO] [axolotl.train.execute_training:196] [PID:2418] Starting trainer...
0%| | 0/63 [00:00<?, ?it/s][2025-10-22 12:56:58,802] [WARNING] [py.warnings._showwarnmsg:110] [PID:2418] /root/miniconda3/envs/py3.11/lib/python3.11/site-packages/bitsandbytes/autograd/_functions.py:186: UserWarning: MatMul8bitLt: inputs will be cast from torch.bfloat16 to float16 during quantization
warnings.warn(f"MatMul8bitLt: inputs will be cast from {A.dtype} to float16 during quantization")
2%|ββ | 1/63 [00:04<04:23, 4.24s/it]
3%|ββββ | 2/63 [00:07<03:35, 3.53s/it]
5%|ββββββ | 3/63 [00:10<03:29, 3.50s/it]
6%|ββββββββ | 4/63 [00:13<03:15, 3.31s/it]
8%|ββββββββββ | 5/63 [00:16<03:06, 3.22s/it]
10%|ββββββββββββ | 6/63 [00:19<03:01, 3.19s/it]
11%|βββββββββββββ | 7/63 [00:22<02:50, 3.04s/it]
13%|βββββββββββββββ | 8/63 [00:25<02:46, 3.03s/it]
14%|βββββββββββββββββ | 9/63 [00:28<02:39, 2.95s/it]
16%|βββββββββββββββββββ | 10/63 [00:32<02:49, 3.20s/it]
{'loss': 2.8315, 'grad_norm': 0.5804110765457153, 'learning_rate': 0.00019357168190404936, 'memory/max_active (GiB)': 19.67, 'memory/max_allocated (GiB)': 19.67, 'memory/device_reserved (GiB)': 21.65, 'tokens_per_second_per_gpu': 1089.23, 'epoch': 0.48}
16%|βββββββββββββββββββ | 10/63 [00:32<02:49, 3.20s/it]
17%|βββββββββββββββββββββ | 11/63 [00:35<02:52, 3.32s/it]
19%|ββββββββββββββββββββββ | 12/63 [00:38<02:41, 3.17s/it]
21%|ββββββββββββββββββββββββ | 13/63 [00:41<02:33, 3.08s/it]
22%|ββββββββββββββββββββββββββ | 14/63 [00:44<02:23, 2.93s/it]
24%|ββββββββββββββββββββββββββββ | 15/63 [00:47<02:21, 2.95s/it]
25%|ββββββββββββββββββββββββββββββ | 16/63 [00:50<02:20, 2.98s/it]
27%|ββββββββββββββββββββββββββββββββ | 17/63 [00:52<02:13, 2.91s/it]
29%|ββββββββββββββββββββββββββββββββββ | 18/63 [00:55<02:11, 2.93s/it]
30%|βββββββββββββββββββββββββββββββββββ | 19/63 [00:58<02:06, 2.87s/it]
32%|βββββββββββββββββββββββββββββββββββββ | 20/63 [01:01<02:02, 2.85s/it]
{'loss': 2.5331, 'grad_norm': 0.5418248772621155, 'learning_rate': 0.00016405931786981755, 'memory/max_active (GiB)': 17.12, 'memory/max_allocated (GiB)': 17.12, 'memory/device_reserved (GiB)': 21.65, 'tokens_per_second_per_gpu': 1399.77, 'epoch': 0.95}
32%|βββββββββββββββββββββββββββββββββββββ | 20/63 [01:01<02:02, 2.85s/it]
33%|βββββββββββββββββββββββββββββββββββββββ | 21/63 [01:04<01:58, 2.82s/it]
35%|βββββββββββββββββββββββββββββββββββββββββ | 22/63 [01:07<02:00, 2.94s/it]
37%|βββββββββββββββββββββββββββββββββββββββββββ | 23/63 [01:09<01:53, 2.83s/it]
38%|βββββββββββββββββββββββββββββββββββββββββββββ | 24/63 [01:13<01:57, 3.01s/it]
40%|ββββββββββββββββββββββββββββββββββββββββββββββ | 25/63 [01:16<01:51, 2.93s/it]
41%|ββββββββββββββββββββββββββββββββββββββββββββββββ | 26/63 [01:18<01:46, 2.87s/it]
43%|ββββββββββββββββββββββββββββββββββββββββββββββββββ | 27/63 [01:21<01:45, 2.93s/it]
44%|ββββββββββββββββββββββββββββββββββββββββββββββββββββ | 28/63 [01:25<01:51, 3.19s/it]
46%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 29/63 [01:28<01:46, 3.15s/it]
48%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 30/63 [01:31<01:38, 2.97s/it]
{'loss': 2.2908, 'grad_norm': 0.926287829875946, 'learning_rate': 0.00011792807588107357, 'memory/max_active (GiB)': 19.67, 'memory/max_allocated (GiB)': 19.67, 'memory/device_reserved (GiB)': 21.66, 'tokens_per_second_per_gpu': 1503.62, 'epoch': 1.43}
48%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 30/63 [01:31<01:38, 2.97s/it]
49%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 31/63 [01:33<01:30, 2.84s/it]
51%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 32/63 [01:36<01:28, 2.85s/it]
52%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 33/63 [01:39<01:25, 2.84s/it]
54%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 34/63 [01:42<01:23, 2.87s/it]
56%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 35/63 [01:45<01:17, 2.78s/it]
57%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 36/63 [01:48<01:17, 2.87s/it]
59%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 37/63 [01:50<01:13, 2.84s/it]
60%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 38/63 [01:53<01:12, 2.90s/it]
62%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 39/63 [01:56<01:10, 2.93s/it]
63%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 40/63 [01:59<01:07, 2.95s/it]
{'loss': 2.3613, 'grad_norm': 0.741166353225708, 'learning_rate': 6.714576180891654e-05, 'memory/max_active (GiB)': 13.69, 'memory/max_allocated (GiB)': 13.69, 'memory/device_reserved (GiB)': 21.66, 'tokens_per_second_per_gpu': 1292.51, 'epoch': 1.9}
63%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 40/63 [01:59<01:07, 2.95s/it]
65%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 41/63 [02:02<01:05, 2.97s/it]
67%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 42/63 [02:05<01:01, 2.91s/it]
68%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 43/63 [02:08<01:00, 3.02s/it]
70%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 44/63 [02:12<01:00, 3.20s/it]
71%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 45/63 [02:15<00:56, 3.11s/it]
73%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 46/63 [02:18<00:52, 3.11s/it]
75%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 47/63 [02:21<00:49, 3.08s/it]
76%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 48/63 [02:25<00:48, 3.23s/it]
78%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 49/63 [02:28<00:43, 3.10s/it]
79%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 50/63 [02:31<00:39, 3.07s/it]
{'loss': 2.1182, 'grad_norm': 0.8406546115875244, 'learning_rate': 2.4886806912948035e-05, 'memory/max_active (GiB)': 19.67, 'memory/max_allocated (GiB)': 19.67, 'memory/device_reserved (GiB)': 21.66, 'tokens_per_second_per_gpu': 1380.03, 'epoch': 2.38}
79%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 50/63 [02:31<00:39, 3.07s/it]
81%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 51/63 [02:33<00:35, 2.97s/it]
83%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 52/63 [02:36<00:32, 2.93s/it]
84%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 53/63 [02:39<00:29, 2.92s/it]
86%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 54/63 [02:42<00:26, 2.92s/it]
87%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 55/63 [02:45<00:23, 2.91s/it]
89%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 56/63 [02:48<00:20, 2.91s/it]
90%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 57/63 [02:51<00:18, 3.04s/it]
92%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 58/63 [02:54<00:14, 2.99s/it]
94%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 59/63 [02:57<00:11, 2.95s/it]
95%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 60/63 [03:00<00:08, 2.95s/it]
{'loss': 2.1277, 'grad_norm': 0.7622830271720886, 'learning_rate': 2.1144314904642195e-06, 'memory/max_active (GiB)': 17.48, 'memory/max_allocated (GiB)': 17.48, 'memory/device_reserved (GiB)': 21.66, 'tokens_per_second_per_gpu': 1322.61, 'epoch': 2.86}
95%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 60/63 [03:00<00:08, 2.95s/it]
97%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 61/63 [03:03<00:05, 2.92s/it]
98%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 62/63 [03:05<00:02, 2.91s/it]
100%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 63/63 [03:08<00:00, 2.87s/it][2025-10-22 13:00:06,761] [INFO] [axolotl.core.trainers.base._save:671] [PID:2418] Saving model checkpoint to /workspace/fine-tuning/output/checkpoint-63
{'train_runtime': 189.206, 'train_samples_per_second': 5.328, 'train_steps_per_second': 0.333, 'train_loss': 2.3706827390761602, 'memory/max_active (GiB)': 13.15, 'memory/max_allocated (GiB)': 13.15, 'memory/device_reserved (GiB)': 21.66, 'epoch': 3.0}
100%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 63/63 [03:09<00:00, 2.87s/it]
100%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 63/63 [03:09<00:00, 3.00s/it]
[2025-10-22 13:00:08,876] [INFO] [axolotl.train.save_trained_model:218] [PID:2418] Training completed! Saving trained model to /workspace/fine-tuning/output.
[2025-10-22 13:00:09,025] [INFO] [axolotl.train.save_trained_model:336] [PID:2418] Model successfully saved to /workspace/fine-tuning/output
|