{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "60299a7f-6e86-4bd6-9dbf-250b42a264b9", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n", "==((====))== Unsloth 2024.8: Fast Llama patching. Transformers = 4.44.2.\n", " \\\\ /| GPU: NVIDIA GeForce RTX 3090. Max memory: 23.691 GB. Platform = Linux.\n", "O^O/ \\_/ \\ Pytorch: 2.3.0. CUDA = 8.6. CUDA Toolkit = 12.1.\n", "\\ / Bfloat16 = TRUE. FA [Xformers = 0.0.27. FA2 = False]\n", " \"-____-\" Free Apache license: http://github.com/unslothai/unsloth\n" ] } ], "source": [ "from unsloth import FastLanguageModel\n", "import torch\n", "max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!\n", "dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+\n", "load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.\n", "\n", "model, tokenizer = FastLanguageModel.from_pretrained(\n", " model_name = \"unsloth/Phi-3.5-mini-instruct\",\n", " max_seq_length = max_seq_length,\n", " dtype = dtype,\n", " load_in_4bit = load_in_4bit,\n", " # token = \"hf_...\", # use one if using gated models like meta-llama/Llama-2-7b-hf\n", ")" ] }, { "cell_type": "code", "execution_count": 3, "id": "8712c5c8-c763-4743-bc8d-54b879433b73", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Unsloth: Already have LoRA adapters! We shall skip this step.\n" ] } ], "source": [ "model = FastLanguageModel.get_peft_model(\n", " model,\n", " r = 32, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128\n", " target_modules = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n", " \"gate_proj\", \"up_proj\", \"down_proj\",],\n", " lora_alpha = 32,\n", " lora_dropout = 0, # Supports any, but = 0 is optimized\n", " bias = \"none\", # Supports any, but = \"none\" is optimized\n", " # [NEW] \"unsloth\" uses 30% less VRAM, fits 2x larger batch sizes!\n", " use_gradient_checkpointing = \"unsloth\", # True or \"unsloth\" for very long context\n", " random_state = 3407,\n", " use_rslora = False, # We support rank stabilized LoRA\n", " loftq_config = None, # And LoftQ\n", ")" ] }, { "cell_type": "code", "execution_count": 4, "id": "c9d36fef-4c62-412d-81a8-2769a1b56042", "metadata": {}, "outputs": [], "source": [ "from datasets import load_dataset\n", "dataset = load_dataset(\"arbinMichael/testparquet\", split = \"train\")" ] }, { "cell_type": "code", "execution_count": 5, "id": "452ad49e-b283-4655-9c99-f30c5eed681c", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "<|user|>Perpare a schedule for current charge/discharge test, the value of the current is a linear variable, using Current Ramp(A) control type. The charging current start value is 0.5A, the rate of change of the current per second is 0.01, up to 4V then ; discharge current start value is -0.5A, the rate of change of the current per second is -0.01, discharging to 1V then end the test. Record one point per second<|end|><|assistant|>[{\"StepCtrlTypeString\":\"Rest\",\"CtrlValue\":\"\",\"Label\":\"Step_A\",\"StepLimits\":[{\"Equations\":\"PV_CHAN_Step_Time>=5\",\"GotoStep\":\"Next Step\"}],\"LogLimits\":[{\"Equations\":\"DV_Time>=1\",\"GotoStep\":\"Next Step\"}]},{\"StepCtrlTypeString\":\"Current Ramp(A)\",\"CtrlValue\":\"0.5\",\"Label\":\"Step_B\",\"StepLimits\":[{\"Equations\":\"PV_CHAN_Voltage>=4\",\"GotoStep\":\"Next Step\"}],\"LogLimits\":[{\"Equations\":\"DV_Time>=1\",\"GotoStep\":\"Next Step\"}]},{\"StepCtrlTypeString\":\"Current Ramp(A)\",\"CtrlValue\":\"-0.5\",\"Label\":\"Step_C\",\"StepLimits\":[{\"Equations\":\"PV_CHAN_Voltage<=1\",\"GotoStep\":\"Next Step\"}],\"LogLimits\":[{\"Equations\":\"DV_Time>=1\",\"GotoStep\":\"Next Step\"}]}]<|end|>\n" ] } ], "source": [ "print(dataset[5][\"text\"])" ] }, { "cell_type": "code", "execution_count": 6, "id": "b0a39d9e-e3bf-4fae-8d75-dba12ccf15c8", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "max_steps is given, it will override any value given in num_train_epochs\n" ] } ], "source": [ "from trl import SFTTrainer\n", "from transformers import TrainingArguments\n", "from unsloth import is_bfloat16_supported\n", "\n", "trainer = SFTTrainer(\n", " model = model,\n", " tokenizer = tokenizer,\n", " train_dataset = dataset,\n", " dataset_text_field = \"text\",\n", " max_seq_length = max_seq_length,\n", " dataset_num_proc = 2,\n", " packing = False, # Can make training 5x faster for short sequences.\n", " args = TrainingArguments(\n", " per_device_train_batch_size = 2,\n", " gradient_accumulation_steps = 4,\n", " warmup_steps = 2,\n", " max_steps = 30,\n", " learning_rate = 2e-4,\n", " fp16 = not is_bfloat16_supported(),\n", " bf16 = is_bfloat16_supported(),\n", " logging_steps = 1,\n", " optim = \"adamw_8bit\",\n", " weight_decay = 0.01,\n", " lr_scheduler_type = \"linear\",\n", " seed = 7444,\n", " output_dir = \"outputs\",\n", " ),\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "625e8b31-82d8-4930-a46e-a82803b4f211", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "==((====))== Unsloth - 2x faster free finetuning | Num GPUs = 1\n", " \\\\ /| Num examples = 10 | Num Epochs = 30\n", "O^O/ \\_/ \\ Batch size per device = 2 | Gradient Accumulation steps = 4\n", "\\ / Total batch size = 8 | Total steps = 30\n", " \"-____-\" Number of trainable parameters = 59,768,832\n" ] }, { "data": { "text/html": [ "\n", "
| Step | \n", "Training Loss | \n", "
|---|---|
| 1 | \n", "1.464500 | \n", "
| 2 | \n", "1.716400 | \n", "
| 3 | \n", "1.310200 | \n", "
| 4 | \n", "1.266100 | \n", "
| 5 | \n", "1.434100 | \n", "
| 6 | \n", "1.185100 | \n", "
| 7 | \n", "1.001900 | \n", "
| 8 | \n", "1.157100 | \n", "
| 9 | \n", "0.660400 | \n", "
| 10 | \n", "0.998400 | \n", "
| 11 | \n", "0.756400 | \n", "
| 12 | \n", "0.931600 | \n", "
| 13 | \n", "0.519000 | \n", "
"
],
"text/plain": [
"\", \"\", \"<0x00>\", \"<...\n",
"llama_model_loader: - kv 22: tokenizer.ggml.scores arr[f32,32064] = [-1000.000000, -1000.000000, -1000.00...\n",
"llama_model_loader: - kv 23: tokenizer.ggml.token_type arr[i32,32064] = [3, 3, 4, 6, 6, 6, 6, 6, 6, 6, 6, 6, ...\n",
"llama_model_loader: - kv 24: tokenizer.ggml.bos_token_id u32 = 1\n",
"llama_model_loader: - kv 25: tokenizer.ggml.eos_token_id u32 = 32000\n",
"llama_model_loader: - kv 26: tokenizer.ggml.unknown_token_id u32 = 0\n",
"llama_model_loader: - kv 27: tokenizer.ggml.padding_token_id u32 = 32009\n",
"llama_model_loader: - kv 28: tokenizer.ggml.add_bos_token bool = false\n",
"llama_model_loader: - kv 29: tokenizer.ggml.add_eos_token bool = false\n",
"llama_model_loader: - kv 30: tokenizer.chat_template str = {% if 'role' in messages[0] %}{% for ...\n",
"llama_model_loader: - kv 31: general.quantization_version u32 = 2\n",
"llama_model_loader: - type f32: 65 tensors\n",
"llama_model_loader: - type bf16: 226 tensors\n",
"[ 1/ 291] token_embd.weight - [ 3072, 32064, 1, 1], type = bf16, converting to q4_K .. size = 187.88 MiB -> 52.84 MiB\n",
"[ 2/ 291] blk.0.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 3/ 291] blk.0.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q6_K .. size = 48.00 MiB -> 19.69 MiB\n",
"[ 4/ 291] blk.0.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 5/ 291] blk.0.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 6/ 291] blk.0.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 7/ 291] blk.0.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 8/ 291] blk.0.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 9/ 291] blk.0.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 10/ 291] blk.0.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q6_K .. size = 18.00 MiB -> 7.38 MiB\n",
"[ 11/ 291] blk.1.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 12/ 291] blk.1.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q6_K .. size = 48.00 MiB -> 19.69 MiB\n",
"[ 13/ 291] blk.1.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 14/ 291] blk.1.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 15/ 291] blk.1.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 16/ 291] blk.1.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 17/ 291] blk.1.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 18/ 291] blk.1.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 19/ 291] blk.1.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q6_K .. size = 18.00 MiB -> 7.38 MiB\n",
"[ 20/ 291] blk.10.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 21/ 291] blk.10.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q6_K .. size = 48.00 MiB -> 19.69 MiB\n",
"[ 22/ 291] blk.10.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 23/ 291] blk.10.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 24/ 291] blk.10.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 25/ 291] blk.10.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 26/ 291] blk.10.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 27/ 291] blk.10.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 28/ 291] blk.10.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q6_K .. size = 18.00 MiB -> 7.38 MiB\n",
"[ 29/ 291] blk.11.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 30/ 291] blk.11.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q6_K .. size = 48.00 MiB -> 19.69 MiB\n",
"[ 31/ 291] blk.11.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 32/ 291] blk.11.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 33/ 291] blk.11.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 34/ 291] blk.11.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 35/ 291] blk.11.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 36/ 291] blk.11.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 37/ 291] blk.11.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q6_K .. size = 18.00 MiB -> 7.38 MiB\n",
"[ 38/ 291] blk.12.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 39/ 291] blk.12.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 40/ 291] blk.12.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 41/ 291] blk.12.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 42/ 291] blk.12.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 43/ 291] blk.12.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 44/ 291] blk.12.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 45/ 291] blk.12.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 46/ 291] blk.12.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 47/ 291] blk.13.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 48/ 291] blk.13.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 49/ 291] blk.13.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 50/ 291] blk.13.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 51/ 291] blk.13.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 52/ 291] blk.13.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 53/ 291] blk.13.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 54/ 291] blk.13.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 55/ 291] blk.13.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 56/ 291] blk.14.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 57/ 291] blk.14.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q6_K .. size = 48.00 MiB -> 19.69 MiB\n",
"[ 58/ 291] blk.14.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 59/ 291] blk.14.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 60/ 291] blk.14.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 61/ 291] blk.14.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 62/ 291] blk.14.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 63/ 291] blk.14.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 64/ 291] blk.14.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q6_K .. size = 18.00 MiB -> 7.38 MiB\n",
"[ 65/ 291] blk.15.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 66/ 291] blk.15.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 67/ 291] blk.15.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 68/ 291] blk.15.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 69/ 291] blk.15.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 70/ 291] blk.15.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 71/ 291] blk.15.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 72/ 291] blk.15.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 73/ 291] blk.15.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 74/ 291] blk.16.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 75/ 291] blk.16.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 76/ 291] blk.16.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 77/ 291] blk.16.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 78/ 291] blk.16.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 79/ 291] blk.16.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 80/ 291] blk.16.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 81/ 291] blk.16.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 82/ 291] blk.16.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 83/ 291] blk.17.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 84/ 291] blk.17.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q6_K .. size = 48.00 MiB -> 19.69 MiB\n",
"[ 85/ 291] blk.17.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 86/ 291] blk.17.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 87/ 291] blk.17.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 88/ 291] blk.17.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 89/ 291] blk.17.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 90/ 291] blk.17.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 91/ 291] blk.17.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q6_K .. size = 18.00 MiB -> 7.38 MiB\n",
"[ 92/ 291] blk.18.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 93/ 291] blk.18.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 94/ 291] blk.18.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 95/ 291] blk.18.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 96/ 291] blk.18.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 97/ 291] blk.18.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 98/ 291] blk.18.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 99/ 291] blk.18.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 100/ 291] blk.18.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 101/ 291] blk.19.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 102/ 291] blk.19.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 103/ 291] blk.19.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 104/ 291] blk.19.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 105/ 291] blk.19.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 106/ 291] blk.19.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 107/ 291] blk.19.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 108/ 291] blk.19.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 109/ 291] blk.19.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 110/ 291] blk.2.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 111/ 291] blk.2.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q6_K .. size = 48.00 MiB -> 19.69 MiB\n",
"[ 112/ 291] blk.2.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 113/ 291] blk.2.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 114/ 291] blk.2.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 115/ 291] blk.2.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 116/ 291] blk.2.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 117/ 291] blk.2.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 118/ 291] blk.2.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q6_K .. size = 18.00 MiB -> 7.38 MiB\n",
"[ 119/ 291] blk.20.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 120/ 291] blk.20.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 121/ 291] blk.20.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 122/ 291] blk.20.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 123/ 291] blk.20.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 124/ 291] blk.20.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 125/ 291] blk.20.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 126/ 291] blk.20.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 127/ 291] blk.20.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 128/ 291] blk.21.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 129/ 291] blk.21.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 130/ 291] blk.3.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 131/ 291] blk.3.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 132/ 291] blk.3.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 133/ 291] blk.3.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 134/ 291] blk.3.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 135/ 291] blk.3.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 136/ 291] blk.3.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 137/ 291] blk.3.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 138/ 291] blk.3.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 139/ 291] blk.4.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 140/ 291] blk.4.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q6_K .. size = 48.00 MiB -> 19.69 MiB\n",
"[ 141/ 291] blk.4.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 142/ 291] blk.4.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 143/ 291] blk.4.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 144/ 291] blk.4.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 145/ 291] blk.4.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 146/ 291] blk.4.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 147/ 291] blk.4.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q6_K .. size = 18.00 MiB -> 7.38 MiB\n",
"[ 148/ 291] blk.5.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 149/ 291] blk.5.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 150/ 291] blk.5.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 151/ 291] blk.5.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 152/ 291] blk.5.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 153/ 291] blk.5.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 154/ 291] blk.5.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 155/ 291] blk.5.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 156/ 291] blk.5.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 157/ 291] blk.6.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 158/ 291] blk.6.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 159/ 291] blk.6.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 160/ 291] blk.6.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 161/ 291] blk.6.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 162/ 291] blk.6.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 163/ 291] blk.6.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 164/ 291] blk.6.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 165/ 291] blk.6.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 166/ 291] blk.7.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 167/ 291] blk.7.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q6_K .. size = 48.00 MiB -> 19.69 MiB\n",
"[ 168/ 291] blk.7.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 169/ 291] blk.7.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 170/ 291] blk.7.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 171/ 291] blk.7.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 172/ 291] blk.7.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 173/ 291] blk.7.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 174/ 291] blk.7.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q6_K .. size = 18.00 MiB -> 7.38 MiB\n",
"[ 175/ 291] blk.8.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 176/ 291] blk.8.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 177/ 291] blk.8.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 178/ 291] blk.8.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 179/ 291] blk.8.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 180/ 291] blk.8.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 181/ 291] blk.8.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 182/ 291] blk.8.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 183/ 291] blk.8.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 184/ 291] blk.9.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 185/ 291] blk.9.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 186/ 291] blk.9.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 187/ 291] blk.9.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 188/ 291] blk.9.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 189/ 291] blk.9.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 190/ 291] blk.9.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 191/ 291] blk.9.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 192/ 291] blk.9.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 193/ 291] output.weight - [ 3072, 32064, 1, 1], type = bf16, converting to q6_K .. size = 187.88 MiB -> 77.06 MiB\n",
"[ 194/ 291] blk.21.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 195/ 291] blk.21.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q6_K .. size = 48.00 MiB -> 19.69 MiB\n",
"[ 196/ 291] blk.21.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 197/ 291] blk.21.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 198/ 291] blk.21.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 199/ 291] blk.21.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 200/ 291] blk.21.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q6_K .. size = 18.00 MiB -> 7.38 MiB\n",
"[ 201/ 291] blk.22.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 202/ 291] blk.22.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 203/ 291] blk.22.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 204/ 291] blk.22.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 205/ 291] blk.22.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 206/ 291] blk.22.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 207/ 291] blk.22.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 208/ 291] blk.22.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 209/ 291] blk.22.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 210/ 291] blk.23.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 211/ 291] blk.23.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 212/ 291] blk.23.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 213/ 291] blk.23.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 214/ 291] blk.23.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 215/ 291] blk.23.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 216/ 291] blk.23.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 217/ 291] blk.23.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 218/ 291] blk.23.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 219/ 291] blk.24.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 220/ 291] blk.24.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q6_K .. size = 48.00 MiB -> 19.69 MiB\n",
"[ 221/ 291] blk.24.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 222/ 291] blk.24.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 223/ 291] blk.24.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 224/ 291] blk.24.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 225/ 291] blk.24.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 226/ 291] blk.24.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 227/ 291] blk.24.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q6_K .. size = 18.00 MiB -> 7.38 MiB\n",
"[ 228/ 291] blk.25.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 229/ 291] blk.25.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 230/ 291] blk.25.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 231/ 291] blk.25.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 232/ 291] blk.25.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 233/ 291] blk.25.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 234/ 291] blk.25.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 235/ 291] blk.25.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 236/ 291] blk.25.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 237/ 291] blk.26.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 238/ 291] blk.26.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 239/ 291] blk.26.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 240/ 291] blk.26.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 241/ 291] blk.26.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 242/ 291] blk.26.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 243/ 291] blk.26.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 244/ 291] blk.26.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 245/ 291] blk.26.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 246/ 291] blk.27.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 247/ 291] blk.27.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q6_K .. size = 48.00 MiB -> 19.69 MiB\n",
"[ 248/ 291] blk.27.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 249/ 291] blk.27.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 250/ 291] blk.27.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 251/ 291] blk.27.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 252/ 291] blk.27.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 253/ 291] blk.27.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 254/ 291] blk.27.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q6_K .. size = 18.00 MiB -> 7.38 MiB\n",
"[ 255/ 291] blk.28.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 256/ 291] blk.28.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q6_K .. size = 48.00 MiB -> 19.69 MiB\n",
"[ 257/ 291] blk.28.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 258/ 291] blk.28.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 259/ 291] blk.28.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 260/ 291] blk.28.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 261/ 291] blk.28.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 262/ 291] blk.28.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 263/ 291] blk.28.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q6_K .. size = 18.00 MiB -> 7.38 MiB\n",
"[ 264/ 291] blk.29.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 265/ 291] blk.29.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q6_K .. size = 48.00 MiB -> 19.69 MiB\n",
"[ 266/ 291] blk.29.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 267/ 291] blk.29.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 268/ 291] blk.29.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 269/ 291] blk.29.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 270/ 291] blk.29.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 271/ 291] blk.29.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 272/ 291] blk.29.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q6_K .. size = 18.00 MiB -> 7.38 MiB\n",
"[ 273/ 291] blk.30.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 274/ 291] blk.30.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q6_K .. size = 48.00 MiB -> 19.69 MiB\n",
"[ 275/ 291] blk.30.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 276/ 291] blk.30.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 277/ 291] blk.30.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 278/ 291] blk.30.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 279/ 291] blk.30.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 280/ 291] blk.30.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 281/ 291] blk.30.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q6_K .. size = 18.00 MiB -> 7.38 MiB\n",
"[ 282/ 291] blk.31.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 283/ 291] blk.31.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q6_K .. size = 48.00 MiB -> 19.69 MiB\n",
"[ 284/ 291] blk.31.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 285/ 291] blk.31.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n",
"[ 286/ 291] blk.31.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"[ 287/ 291] blk.31.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 288/ 291] blk.31.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 289/ 291] blk.31.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n",
"[ 290/ 291] blk.31.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q6_K .. size = 18.00 MiB -> 7.38 MiB\n",
"[ 291/ 291] output_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n",
"llama_model_quantize_internal: model size = 7288.51 MB\n",
"llama_model_quantize_internal: quant size = 2210.78 MB\n",
"\n",
"main: quantize time = 85490.82 ms\n",
"main: total time = 85490.83 ms\n",
"Unsloth: Conversion completed! Output location: ./testmodel/unsloth.Q4_K_M.gguf\n",
"Unsloth: Saved Ollama Modelfile to testmodel/Modelfile\n"
]
}
],
"source": [
"model.save_pretrained_gguf(\"testmodel\", tokenizer, quantization_method = \"q4_k_m\")"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "de91a837-490d-468a-9d2e-08d24453dbfe",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Unsloth: Merging 4bit and LoRA weights to 16bit...\n",
"Unsloth: Will use up to 15.36 out of 28.5 RAM for saving.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|███████████████████████████████████████████| 40/40 [01:22<00:00, 2.07s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Unsloth: Saving tokenizer... Done.\n",
"Unsloth: Saving model... This might take 5 minutes for Llama-7b...\n",
"Done.\n",
"==((====))== Unsloth: Conversion from QLoRA to GGUF information\n",
" \\\\ /| [0] Installing llama.cpp will take 3 minutes.\n",
"O^O/ \\_/ \\ [1] Converting HF to GGUF 16bits will take 3 minutes.\n",
"\\ / [2] Converting GGUF 16bits to ['q4_k_m'] will take 10 minutes each.\n",
" \"-____-\" In total, you will have to wait at least 16 minutes.\n",
"\n",
"Unsloth: [0] Installing llama.cpp. This will take 3 minutes...\n",
"Unsloth: [1] Converting model at arbinMichael/phimed into bf16 GGUF format.\n",
"The output location will be ./arbinMichael/phimed/unsloth.BF16.gguf\n",
"This will take 3 minutes...\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Unsloth: Extending arbinMichael/phimed/tokenizer.model with added_tokens.json.\n",
"Originally tokenizer.model is of size (32000).\n",
"But we need to extend to sentencepiece vocab size (32011).\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:hf-to-gguf:Loading model: phimed\n",
"INFO:gguf.gguf_writer:gguf: This GGUF file is for Little Endian only\n",
"INFO:hf-to-gguf:Exporting model...\n",
"INFO:hf-to-gguf:gguf: loading model weight map from 'model.safetensors.index.json'\n",
"INFO:hf-to-gguf:gguf: loading model part 'model-00001-of-00006.safetensors'\n",
"INFO:hf-to-gguf:token_embd.weight, torch.bfloat16 --> BF16, shape = {5120, 32064}\n",
"INFO:hf-to-gguf:blk.0.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.0.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
"INFO:hf-to-gguf:blk.0.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.0.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.0.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.0.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.0.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.0.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.0.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.1.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.1.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
"INFO:hf-to-gguf:blk.1.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.1.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.1.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.1.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.1.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.1.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.1.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.2.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.2.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
"INFO:hf-to-gguf:blk.2.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.2.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.2.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.2.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.2.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.2.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.2.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.3.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.3.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
"INFO:hf-to-gguf:blk.3.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.3.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.3.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.3.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.3.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.3.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.3.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.4.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.4.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
"INFO:hf-to-gguf:blk.4.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.4.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.4.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.4.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.4.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.4.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.4.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.5.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.5.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
"INFO:hf-to-gguf:blk.5.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.5.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.5.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.5.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.5.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.5.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.5.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.6.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.6.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.6.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.6.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.6.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.6.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:gguf: loading model part 'model-00002-of-00006.safetensors'\n",
"INFO:hf-to-gguf:blk.10.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.10.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
"INFO:hf-to-gguf:blk.10.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.10.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.10.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.10.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.10.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.10.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.10.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.11.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.11.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
"INFO:hf-to-gguf:blk.11.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.11.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.11.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.11.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.11.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.11.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.11.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.12.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.12.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
"INFO:hf-to-gguf:blk.12.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.12.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.12.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.12.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.12.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.12.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.12.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.13.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.13.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
"INFO:hf-to-gguf:blk.13.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.13.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.13.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.13.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.13.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.13.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.13.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.6.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.6.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
"INFO:hf-to-gguf:blk.6.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.7.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.7.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
"INFO:hf-to-gguf:blk.7.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.7.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.7.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.7.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.7.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.7.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.7.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.8.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.8.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
"INFO:hf-to-gguf:blk.8.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.8.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.8.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.8.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.8.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.8.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.8.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.9.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.9.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
"INFO:hf-to-gguf:blk.9.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.9.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.9.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.9.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.9.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.9.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.9.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:gguf: loading model part 'model-00003-of-00006.safetensors'\n",
"INFO:hf-to-gguf:blk.14.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.14.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
"INFO:hf-to-gguf:blk.14.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.14.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.14.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.14.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.14.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.14.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.14.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.15.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.15.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
"INFO:hf-to-gguf:blk.15.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.15.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.15.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.15.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.15.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.15.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.15.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.16.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.16.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
"INFO:hf-to-gguf:blk.16.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.16.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.16.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.16.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.16.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.16.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.16.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.17.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.17.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
"INFO:hf-to-gguf:blk.17.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.17.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.17.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.17.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.17.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.17.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.17.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.18.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.18.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
"INFO:hf-to-gguf:blk.18.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.18.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.18.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.18.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.18.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.18.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.18.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.19.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.19.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
"INFO:hf-to-gguf:blk.19.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.19.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.19.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.19.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.19.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.19.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.19.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.20.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.20.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
"INFO:hf-to-gguf:blk.20.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.20.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.20.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.20.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.20.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.20.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.20.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.21.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.21.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.21.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.21.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:gguf: loading model part 'model-00004-of-00006.safetensors'\n",
"INFO:hf-to-gguf:blk.21.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.21.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
"INFO:hf-to-gguf:blk.21.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.21.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.21.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.22.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.22.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
"INFO:hf-to-gguf:blk.22.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.22.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.22.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.22.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.22.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.22.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.22.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.23.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.23.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
"INFO:hf-to-gguf:blk.23.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.23.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.23.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.23.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.23.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.23.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.23.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.24.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.24.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
"INFO:hf-to-gguf:blk.24.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.24.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.24.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.24.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.24.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.24.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.24.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.25.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.25.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
"INFO:hf-to-gguf:blk.25.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.25.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.25.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.25.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.25.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.25.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.25.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.26.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.26.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
"INFO:hf-to-gguf:blk.26.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.26.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.26.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.26.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.26.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.26.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.26.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.27.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.27.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
"INFO:hf-to-gguf:blk.27.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.27.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.27.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.27.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.27.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.27.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.27.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.28.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.28.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.28.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.28.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.28.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:gguf: loading model part 'model-00005-of-00006.safetensors'\n",
"INFO:hf-to-gguf:blk.28.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.28.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
"INFO:hf-to-gguf:blk.28.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.28.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.29.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.29.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
"INFO:hf-to-gguf:blk.29.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.29.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.29.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.29.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.29.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.29.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.29.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.30.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.30.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
"INFO:hf-to-gguf:blk.30.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.30.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.30.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.30.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.30.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.30.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.30.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.31.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.31.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
"INFO:hf-to-gguf:blk.31.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.31.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.31.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.31.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.31.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.31.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.31.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.32.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.32.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
"INFO:hf-to-gguf:blk.32.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.32.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.32.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.32.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.32.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.32.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.32.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.33.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.33.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
"INFO:hf-to-gguf:blk.33.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.33.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.33.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.33.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.33.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.33.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.33.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.34.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.34.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
"INFO:hf-to-gguf:blk.34.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.34.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.34.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.34.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.34.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.34.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.34.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.35.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.35.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.35.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.35.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.35.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.35.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:gguf: loading model part 'model-00006-of-00006.safetensors'\n",
"INFO:hf-to-gguf:output.weight, torch.bfloat16 --> BF16, shape = {5120, 32064}\n",
"INFO:hf-to-gguf:blk.35.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.35.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
"INFO:hf-to-gguf:blk.35.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.36.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.36.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
"INFO:hf-to-gguf:blk.36.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.36.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.36.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.36.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.36.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.36.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.36.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.37.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.37.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
"INFO:hf-to-gguf:blk.37.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.37.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.37.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.37.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.37.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.37.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.37.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.38.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.38.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
"INFO:hf-to-gguf:blk.38.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.38.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.38.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.38.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.38.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.38.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.38.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.39.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.39.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
"INFO:hf-to-gguf:blk.39.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.39.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
"INFO:hf-to-gguf:blk.39.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:blk.39.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:blk.39.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.39.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
"INFO:hf-to-gguf:blk.39.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
"INFO:hf-to-gguf:output_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n",
"INFO:hf-to-gguf:Set meta model\n",
"INFO:hf-to-gguf:Set model parameters\n",
"INFO:hf-to-gguf:gguf: context length = 4096\n",
"INFO:hf-to-gguf:gguf: embedding length = 5120\n",
"INFO:hf-to-gguf:gguf: feed forward length = 17920\n",
"INFO:hf-to-gguf:gguf: head count = 40\n",
"INFO:hf-to-gguf:gguf: key-value head count = 10\n",
"INFO:hf-to-gguf:gguf: rope theta = 10000.0\n",
"INFO:hf-to-gguf:gguf: rms norm epsilon = 1e-05\n",
"INFO:hf-to-gguf:gguf: file type = 32\n",
"INFO:hf-to-gguf:Set model tokenizer\n",
"INFO:gguf.vocab:Setting special token type bos to 1\n",
"INFO:gguf.vocab:Setting special token type eos to 32000\n",
"INFO:gguf.vocab:Setting special token type unk to 0\n",
"INFO:gguf.vocab:Setting special token type pad to 32009\n",
"INFO:gguf.vocab:Setting add_bos_token to False\n",
"INFO:gguf.vocab:Setting add_eos_token to False\n",
"INFO:gguf.vocab:Setting chat_template to {% if 'role' in messages[0] %}{% for message in messages %}{% if message['role'] == 'user' %}{{'<|user|>\n",
"' + message['content'] + '<|end|>\n",
"'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>\n",
"' + message['content'] + '<|end|>\n",
"'}}{% else %}{{'<|' + message['role'] + '|>\n",
"' + message['content'] + '<|end|>\n",
"'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n",
"' }}{% endif %}{% else %}{% for message in messages %}{% if message['from'] == 'human' %}{{'<|user|>\n",
"' + message['value'] + '<|end|>\n",
"'}}{% elif message['from'] == 'gpt' %}{{'<|assistant|>\n",
"' + message['value'] + '<|end|>\n",
"'}}{% else %}{{'<|' + message['from'] + '|>\n",
"' + message['value'] + '<|end|>\n",
"'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n",
"' }}{% endif %}{% endif %}\n",
"INFO:hf-to-gguf:Set model quantization version\n",
"INFO:gguf.gguf_writer:Writing the following files:\n",
"INFO:gguf.gguf_writer:arbinMichael/phimed/unsloth.BF16.gguf: n_tensors = 363, total_size = 27.9G\n",
"Writing: 100%|██████████| 27.9G/27.9G [04:26<00:00, 105Mbyte/s] \n",
"INFO:hf-to-gguf:Model successfully exported to arbinMichael/phimed/unsloth.BF16.gguf\n",
"Unsloth: Conversion completed! Output location: ./arbinMichael/phimed/unsloth.BF16.gguf\n",
"Unsloth: [2] Converting GGUF 16bit into q4_k_m. This will take 20 minutes...\n",
"main: build = 3651 (8f1d81a0)\n",
"main: built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu\n",
"main: quantizing './arbinMichael/phimed/unsloth.BF16.gguf' to './arbinMichael/phimed/unsloth.Q4_K_M.gguf' as Q4_K_M using 24 threads\n",
"llama_model_loader: loaded meta data with 34 key-value pairs and 363 tensors from ./arbinMichael/phimed/unsloth.BF16.gguf (version GGUF V3 (latest))\n",
"llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.\n",
"llama_model_loader: - kv 0: general.architecture str = llama\n",
"llama_model_loader: - kv 1: general.type str = model\n",
"llama_model_loader: - kv 2: general.name str = Phi 3 Medium 4k Instruct Bnb 4bit\n",
"llama_model_loader: - kv 3: general.organization str = Unsloth\n",
"llama_model_loader: - kv 4: general.finetune str = 4k-instruct-bnb-4bit\n",
"llama_model_loader: - kv 5: general.basename str = phi-3\n",
"llama_model_loader: - kv 6: general.size_label str = medium\n",
"llama_model_loader: - kv 7: llama.block_count u32 = 40\n",
"llama_model_loader: - kv 8: llama.context_length u32 = 4096\n",
"llama_model_loader: - kv 9: llama.embedding_length u32 = 5120\n",
"llama_model_loader: - kv 10: llama.feed_forward_length u32 = 17920\n",
"llama_model_loader: - kv 11: llama.attention.head_count u32 = 40\n",
"llama_model_loader: - kv 12: llama.attention.head_count_kv u32 = 10\n",
"llama_model_loader: - kv 13: llama.rope.freq_base f32 = 10000.000000\n",
"llama_model_loader: - kv 14: llama.attention.layer_norm_rms_epsilon f32 = 0.000010\n",
"llama_model_loader: - kv 15: llama.attention.key_length u32 = 128\n",
"llama_model_loader: - kv 16: llama.attention.value_length u32 = 128\n",
"llama_model_loader: - kv 17: general.file_type u32 = 32\n",
"llama_model_loader: - kv 18: llama.vocab_size u32 = 32064\n",
"llama_model_loader: - kv 19: llama.rope.dimension_count u32 = 128\n",
"llama_model_loader: - kv 20: tokenizer.ggml.add_space_prefix bool = false\n",
"llama_model_loader: - kv 21: tokenizer.ggml.model str = llama\n",
"llama_model_loader: - kv 22: tokenizer.ggml.pre str = default\n",
"llama_model_loader: - kv 23: tokenizer.ggml.tokens arr[str,32064] = [\"\", \"\", \"<0x00>\", \"<...\n",
"llama_model_loader: - kv 24: tokenizer.ggml.scores arr[f32,32064] = [-1000.000000, -1000.000000, -1000.00...\n",
"llama_model_loader: - kv 25: tokenizer.ggml.token_type arr[i32,32064] = [3, 3, 4, 6, 6, 6, 6, 6, 6, 6, 6, 6, ...\n",
"llama_model_loader: - kv 26: tokenizer.ggml.bos_token_id u32 = 1\n",
"llama_model_loader: - kv 27: tokenizer.ggml.eos_token_id u32 = 32000\n",
"llama_model_loader: - kv 28: tokenizer.ggml.unknown_token_id u32 = 0\n",
"llama_model_loader: - kv 29: tokenizer.ggml.padding_token_id u32 = 32009\n",
"llama_model_loader: - kv 30: tokenizer.ggml.add_bos_token bool = false\n",
"llama_model_loader: - kv 31: tokenizer.ggml.add_eos_token bool = false\n",
"llama_model_loader: - kv 32: tokenizer.chat_template str = {% if 'role' in messages[0] %}{% for ...\n",
"llama_model_loader: - kv 33: general.quantization_version u32 = 2\n",
"llama_model_loader: - type f32: 81 tensors\n",
"llama_model_loader: - type bf16: 282 tensors\n",
"[ 1/ 363] token_embd.weight - [ 5120, 32064, 1, 1], type = bf16, converting to q4_K .. size = 313.12 MiB -> 88.07 MiB\n",
"[ 2/ 363] blk.0.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 3/ 363] blk.0.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q6_K .. size = 175.00 MiB -> 71.78 MiB\n",
"[ 4/ 363] blk.0.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 5/ 363] blk.0.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 6/ 363] blk.0.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 7/ 363] blk.0.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 8/ 363] blk.0.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 9/ 363] blk.0.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 10/ 363] blk.0.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q6_K .. size = 12.50 MiB -> 5.13 MiB\n",
"[ 11/ 363] blk.1.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 12/ 363] blk.1.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q6_K .. size = 175.00 MiB -> 71.78 MiB\n",
"[ 13/ 363] blk.1.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 14/ 363] blk.1.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 15/ 363] blk.1.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 16/ 363] blk.1.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 17/ 363] blk.1.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 18/ 363] blk.1.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 19/ 363] blk.1.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q6_K .. size = 12.50 MiB -> 5.13 MiB\n",
"[ 20/ 363] blk.2.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 21/ 363] blk.2.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q6_K .. size = 175.00 MiB -> 71.78 MiB\n",
"[ 22/ 363] blk.2.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 23/ 363] blk.2.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 24/ 363] blk.2.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 25/ 363] blk.2.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 26/ 363] blk.2.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 27/ 363] blk.2.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 28/ 363] blk.2.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q6_K .. size = 12.50 MiB -> 5.13 MiB\n",
"[ 29/ 363] blk.3.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 30/ 363] blk.3.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q6_K .. size = 175.00 MiB -> 71.78 MiB\n",
"[ 31/ 363] blk.3.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 32/ 363] blk.3.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 33/ 363] blk.3.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 34/ 363] blk.3.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 35/ 363] blk.3.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 36/ 363] blk.3.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 37/ 363] blk.3.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q6_K .. size = 12.50 MiB -> 5.13 MiB\n",
"[ 38/ 363] blk.4.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 39/ 363] blk.4.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q6_K .. size = 175.00 MiB -> 71.78 MiB\n",
"[ 40/ 363] blk.4.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 41/ 363] blk.4.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 42/ 363] blk.4.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 43/ 363] blk.4.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 44/ 363] blk.4.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 45/ 363] blk.4.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 46/ 363] blk.4.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q6_K .. size = 12.50 MiB -> 5.13 MiB\n",
"[ 47/ 363] blk.5.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 48/ 363] blk.5.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 49/ 363] blk.5.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 50/ 363] blk.5.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 51/ 363] blk.5.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 52/ 363] blk.5.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 53/ 363] blk.5.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 54/ 363] blk.5.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 55/ 363] blk.5.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 56/ 363] blk.6.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 57/ 363] blk.6.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 58/ 363] blk.6.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 59/ 363] blk.6.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 60/ 363] blk.6.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 61/ 363] blk.6.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 62/ 363] blk.10.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 63/ 363] blk.10.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 64/ 363] blk.10.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 65/ 363] blk.10.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 66/ 363] blk.10.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 67/ 363] blk.10.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 68/ 363] blk.10.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 69/ 363] blk.10.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 70/ 363] blk.10.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q6_K .. size = 12.50 MiB -> 5.13 MiB\n",
"[ 71/ 363] blk.11.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 72/ 363] blk.11.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q6_K .. size = 175.00 MiB -> 71.78 MiB\n",
"[ 73/ 363] blk.11.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 74/ 363] blk.11.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 75/ 363] blk.11.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 76/ 363] blk.11.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 77/ 363] blk.11.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 78/ 363] blk.11.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 79/ 363] blk.11.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 80/ 363] blk.12.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 81/ 363] blk.12.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 82/ 363] blk.12.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 83/ 363] blk.12.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 84/ 363] blk.12.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 85/ 363] blk.12.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 86/ 363] blk.12.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 87/ 363] blk.12.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 88/ 363] blk.12.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 89/ 363] blk.13.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 90/ 363] blk.13.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 91/ 363] blk.13.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 92/ 363] blk.13.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 93/ 363] blk.13.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 94/ 363] blk.13.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 95/ 363] blk.13.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 96/ 363] blk.13.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 97/ 363] blk.13.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q6_K .. size = 12.50 MiB -> 5.13 MiB\n",
"[ 98/ 363] blk.6.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 99/ 363] blk.6.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q6_K .. size = 175.00 MiB -> 71.78 MiB\n",
"[ 100/ 363] blk.6.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 101/ 363] blk.7.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 102/ 363] blk.7.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 103/ 363] blk.7.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 104/ 363] blk.7.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 105/ 363] blk.7.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 106/ 363] blk.7.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 107/ 363] blk.7.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 108/ 363] blk.7.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 109/ 363] blk.7.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 110/ 363] blk.8.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 111/ 363] blk.8.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 112/ 363] blk.8.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 113/ 363] blk.8.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 114/ 363] blk.8.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 115/ 363] blk.8.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 116/ 363] blk.8.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 117/ 363] blk.8.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 118/ 363] blk.8.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 119/ 363] blk.9.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 120/ 363] blk.9.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q6_K .. size = 175.00 MiB -> 71.78 MiB\n",
"[ 121/ 363] blk.9.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 122/ 363] blk.9.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 123/ 363] blk.9.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 124/ 363] blk.9.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 125/ 363] blk.9.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 126/ 363] blk.9.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 127/ 363] blk.9.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q6_K .. size = 12.50 MiB -> 5.13 MiB\n",
"[ 128/ 363] blk.14.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 129/ 363] blk.14.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 130/ 363] blk.14.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 131/ 363] blk.14.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 132/ 363] blk.14.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 133/ 363] blk.14.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 134/ 363] blk.14.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 135/ 363] blk.14.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 136/ 363] blk.14.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 137/ 363] blk.15.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 138/ 363] blk.15.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 139/ 363] blk.15.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 140/ 363] blk.15.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 141/ 363] blk.15.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 142/ 363] blk.15.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 143/ 363] blk.15.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 144/ 363] blk.15.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 145/ 363] blk.15.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 146/ 363] blk.16.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 147/ 363] blk.16.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q6_K .. size = 175.00 MiB -> 71.78 MiB\n",
"[ 148/ 363] blk.16.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 149/ 363] blk.16.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 150/ 363] blk.16.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 151/ 363] blk.16.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 152/ 363] blk.16.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 153/ 363] blk.16.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 154/ 363] blk.16.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q6_K .. size = 12.50 MiB -> 5.13 MiB\n",
"[ 155/ 363] blk.17.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 156/ 363] blk.17.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 157/ 363] blk.17.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 158/ 363] blk.17.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 159/ 363] blk.17.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 160/ 363] blk.17.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 161/ 363] blk.17.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 162/ 363] blk.17.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 163/ 363] blk.17.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 164/ 363] blk.18.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 165/ 363] blk.18.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 166/ 363] blk.18.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 167/ 363] blk.18.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 168/ 363] blk.18.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 169/ 363] blk.18.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 170/ 363] blk.18.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 171/ 363] blk.18.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 172/ 363] blk.18.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 173/ 363] blk.19.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 174/ 363] blk.19.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q6_K .. size = 175.00 MiB -> 71.78 MiB\n",
"[ 175/ 363] blk.19.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 176/ 363] blk.19.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 177/ 363] blk.19.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 178/ 363] blk.19.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 179/ 363] blk.19.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 180/ 363] blk.19.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 181/ 363] blk.19.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q6_K .. size = 12.50 MiB -> 5.13 MiB\n",
"[ 182/ 363] blk.20.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 183/ 363] blk.20.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 184/ 363] blk.20.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 185/ 363] blk.20.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 186/ 363] blk.20.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 187/ 363] blk.20.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 188/ 363] blk.20.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 189/ 363] blk.20.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 190/ 363] blk.20.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 191/ 363] blk.21.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 192/ 363] blk.21.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 193/ 363] blk.21.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 194/ 363] blk.21.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 195/ 363] blk.21.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 196/ 363] blk.21.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 197/ 363] blk.21.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 198/ 363] blk.21.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 199/ 363] blk.21.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 200/ 363] blk.22.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 201/ 363] blk.22.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q6_K .. size = 175.00 MiB -> 71.78 MiB\n",
"[ 202/ 363] blk.22.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 203/ 363] blk.22.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 204/ 363] blk.22.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 205/ 363] blk.22.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 206/ 363] blk.22.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 207/ 363] blk.22.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 208/ 363] blk.22.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q6_K .. size = 12.50 MiB -> 5.13 MiB\n",
"[ 209/ 363] blk.23.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 210/ 363] blk.23.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 211/ 363] blk.23.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 212/ 363] blk.23.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 213/ 363] blk.23.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 214/ 363] blk.23.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 215/ 363] blk.23.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 216/ 363] blk.23.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 217/ 363] blk.23.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 218/ 363] blk.24.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 219/ 363] blk.24.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 220/ 363] blk.24.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 221/ 363] blk.24.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 222/ 363] blk.24.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 223/ 363] blk.24.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 224/ 363] blk.24.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 225/ 363] blk.24.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 226/ 363] blk.24.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 227/ 363] blk.25.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 228/ 363] blk.25.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q6_K .. size = 175.00 MiB -> 71.78 MiB\n",
"[ 229/ 363] blk.25.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 230/ 363] blk.25.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 231/ 363] blk.25.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 232/ 363] blk.25.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 233/ 363] blk.25.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 234/ 363] blk.25.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 235/ 363] blk.25.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q6_K .. size = 12.50 MiB -> 5.13 MiB\n",
"[ 236/ 363] blk.26.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 237/ 363] blk.26.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 238/ 363] blk.26.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 239/ 363] blk.26.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 240/ 363] blk.26.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 241/ 363] blk.26.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 242/ 363] blk.26.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 243/ 363] blk.26.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 244/ 363] blk.26.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 245/ 363] blk.27.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 246/ 363] blk.27.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 247/ 363] blk.27.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 248/ 363] blk.27.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 249/ 363] blk.27.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 250/ 363] blk.27.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 251/ 363] blk.27.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 252/ 363] blk.27.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 253/ 363] blk.27.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 254/ 363] blk.28.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 255/ 363] blk.28.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 256/ 363] blk.28.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 257/ 363] blk.28.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 258/ 363] blk.28.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q6_K .. size = 12.50 MiB -> 5.13 MiB\n",
"[ 259/ 363] blk.28.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 260/ 363] blk.28.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q6_K .. size = 175.00 MiB -> 71.78 MiB\n",
"[ 261/ 363] blk.28.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 262/ 363] blk.28.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 263/ 363] blk.29.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 264/ 363] blk.29.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 265/ 363] blk.29.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 266/ 363] blk.29.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 267/ 363] blk.29.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 268/ 363] blk.29.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 269/ 363] blk.29.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 270/ 363] blk.29.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 271/ 363] blk.29.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 272/ 363] blk.30.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 273/ 363] blk.30.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 274/ 363] blk.30.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 275/ 363] blk.30.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 276/ 363] blk.30.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 277/ 363] blk.30.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 278/ 363] blk.30.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 279/ 363] blk.30.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 280/ 363] blk.30.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 281/ 363] blk.31.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 282/ 363] blk.31.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q6_K .. size = 175.00 MiB -> 71.78 MiB\n",
"[ 283/ 363] blk.31.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 284/ 363] blk.31.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 285/ 363] blk.31.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 286/ 363] blk.31.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 287/ 363] blk.31.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 288/ 363] blk.31.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 289/ 363] blk.31.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q6_K .. size = 12.50 MiB -> 5.13 MiB\n",
"[ 290/ 363] blk.32.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 291/ 363] blk.32.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 292/ 363] blk.32.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 293/ 363] blk.32.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 294/ 363] blk.32.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 295/ 363] blk.32.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 296/ 363] blk.32.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 297/ 363] blk.32.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 298/ 363] blk.32.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 299/ 363] blk.33.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 300/ 363] blk.33.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 301/ 363] blk.33.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 302/ 363] blk.33.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 303/ 363] blk.33.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 304/ 363] blk.33.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 305/ 363] blk.33.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 306/ 363] blk.33.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 307/ 363] blk.33.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 308/ 363] blk.34.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 309/ 363] blk.34.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q6_K .. size = 175.00 MiB -> 71.78 MiB\n",
"[ 310/ 363] blk.34.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 311/ 363] blk.34.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 312/ 363] blk.34.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 313/ 363] blk.34.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 314/ 363] blk.34.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 315/ 363] blk.34.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 316/ 363] blk.34.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q6_K .. size = 12.50 MiB -> 5.13 MiB\n",
"[ 317/ 363] blk.35.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 318/ 363] blk.35.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 319/ 363] blk.35.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 320/ 363] blk.35.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 321/ 363] blk.35.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 322/ 363] blk.35.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q6_K .. size = 12.50 MiB -> 5.13 MiB\n",
"[ 323/ 363] output.weight - [ 5120, 32064, 1, 1], type = bf16, converting to q6_K .. size = 313.12 MiB -> 128.43 MiB\n",
"[ 324/ 363] blk.35.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 325/ 363] blk.35.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q6_K .. size = 175.00 MiB -> 71.78 MiB\n",
"[ 326/ 363] blk.35.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 327/ 363] blk.36.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 328/ 363] blk.36.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q6_K .. size = 175.00 MiB -> 71.78 MiB\n",
"[ 329/ 363] blk.36.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 330/ 363] blk.36.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 331/ 363] blk.36.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 332/ 363] blk.36.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 333/ 363] blk.36.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 334/ 363] blk.36.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 335/ 363] blk.36.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q6_K .. size = 12.50 MiB -> 5.13 MiB\n",
"[ 336/ 363] blk.37.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 337/ 363] blk.37.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q6_K .. size = 175.00 MiB -> 71.78 MiB\n",
"[ 338/ 363] blk.37.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 339/ 363] blk.37.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 340/ 363] blk.37.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 341/ 363] blk.37.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 342/ 363] blk.37.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 343/ 363] blk.37.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 344/ 363] blk.37.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q6_K .. size = 12.50 MiB -> 5.13 MiB\n",
"[ 345/ 363] blk.38.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 346/ 363] blk.38.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q6_K .. size = 175.00 MiB -> 71.78 MiB\n",
"[ 347/ 363] blk.38.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 348/ 363] blk.38.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 349/ 363] blk.38.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 350/ 363] blk.38.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 351/ 363] blk.38.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 352/ 363] blk.38.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 353/ 363] blk.38.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q6_K .. size = 12.50 MiB -> 5.13 MiB\n",
"[ 354/ 363] blk.39.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 355/ 363] blk.39.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q6_K .. size = 175.00 MiB -> 71.78 MiB\n",
"[ 356/ 363] blk.39.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 357/ 363] blk.39.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n",
"[ 358/ 363] blk.39.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"[ 359/ 363] blk.39.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n",
"[ 360/ 363] blk.39.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 361/ 363] blk.39.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n",
"[ 362/ 363] blk.39.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q6_K .. size = 12.50 MiB -> 5.13 MiB\n",
"[ 363/ 363] output_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n",
"llama_model_quantize_internal: model size = 26627.83 MB\n",
"llama_model_quantize_internal: quant size = 8013.98 MB\n",
"\n",
"main: quantize time = 403335.17 ms\n",
"main: total time = 403335.17 ms\n",
"Unsloth: Conversion completed! Output location: ./arbinMichael/phimed/unsloth.Q4_K_M.gguf\n",
"Unsloth: Saved Ollama Modelfile to arbinMichael/phimed/Modelfile\n",
"Unsloth: Uploading GGUF to Huggingface Hub...\n"
]
},
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[12], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m model\u001b[38;5;241m.\u001b[39mpush_to_hub_gguf(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124marbinMichael/phimed\u001b[39m\u001b[38;5;124m\"\u001b[39m, tokenizer, quantization_method \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mq4_k_m\u001b[39m\u001b[38;5;124m\"\u001b[39m, token \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhf_vtFNaIAElPdJZAlrlodgjoAcnxofiuDpaH\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
"File \u001b[0;32m~/.conda/envs/ft5/lib/python3.11/site-packages/unsloth/save.py:1830\u001b[0m, in \u001b[0;36munsloth_push_to_hub_gguf\u001b[0;34m(self, repo_id, tokenizer, quantization_method, first_conversion, use_temp_dir, commit_message, private, token, max_shard_size, create_pr, safe_serialization, revision, commit_description, tags, temporary_location, maximum_memory_usage)\u001b[0m\n\u001b[1;32m 1828\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m file_location \u001b[38;5;129;01min\u001b[39;00m all_file_locations:\n\u001b[1;32m 1829\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUnsloth: Uploading GGUF to Huggingface Hub...\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m-> 1830\u001b[0m username \u001b[38;5;241m=\u001b[39m upload_to_huggingface(\n\u001b[1;32m 1831\u001b[0m \u001b[38;5;28mself\u001b[39m, repo_id, token,\n\u001b[1;32m 1832\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mGGUF converted\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mgguf\u001b[39m\u001b[38;5;124m\"\u001b[39m, file_location, old_username, private,\n\u001b[1;32m 1833\u001b[0m )\n\u001b[1;32m 1834\u001b[0m link \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00musername\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mnew_save_directory\u001b[38;5;241m.\u001b[39mlstrip(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m/.\u001b[39m\u001b[38;5;124m'\u001b[39m)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \\\n\u001b[1;32m 1835\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m username \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m new_save_directory \u001b[38;5;28;01melse\u001b[39;00m \\\n\u001b[1;32m 1836\u001b[0m new_save_directory\u001b[38;5;241m.\u001b[39mlstrip(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m/.\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 1838\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSaved GGUF to https://huggingface.co/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mlink\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n",
"File \u001b[0;32m~/.conda/envs/ft5/lib/python3.11/site-packages/unsloth/save.py:1416\u001b[0m, in \u001b[0;36mupload_to_huggingface\u001b[0;34m(model, save_directory, token, method, extra, file_location, old_username, private, create_config)\u001b[0m\n\u001b[1;32m 1413\u001b[0m \u001b[38;5;28;01mpass\u001b[39;00m\n\u001b[1;32m 1414\u001b[0m \u001b[38;5;28;01mpass\u001b[39;00m\n\u001b[0;32m-> 1416\u001b[0m hf_api\u001b[38;5;241m.\u001b[39mupload_file(\n\u001b[1;32m 1417\u001b[0m path_or_fileobj \u001b[38;5;241m=\u001b[39m file_location,\n\u001b[1;32m 1418\u001b[0m path_in_repo \u001b[38;5;241m=\u001b[39m uploaded_location,\n\u001b[1;32m 1419\u001b[0m repo_id \u001b[38;5;241m=\u001b[39m save_directory,\n\u001b[1;32m 1420\u001b[0m repo_type \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 1421\u001b[0m commit_message \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m(Trained with Unsloth)\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 1422\u001b[0m )\n\u001b[1;32m 1424\u001b[0m \u001b[38;5;66;03m# We also upload a config.json file\u001b[39;00m\n\u001b[1;32m 1425\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m create_config:\n",
"File \u001b[0;32m~/.conda/envs/ft5/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py:114\u001b[0m, in \u001b[0;36mvalidate_hf_hub_args.