{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "60299a7f-6e86-4bd6-9dbf-250b42a264b9",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
      "==((====))==  Unsloth 2024.8: Fast Llama patching. Transformers = 4.44.2.\n",
      "   \\\\   /|    GPU: NVIDIA GeForce RTX 3090. Max memory: 23.691 GB. Platform = Linux.\n",
      "O^O/ \\_/ \\    Pytorch: 2.3.0. CUDA = 8.6. CUDA Toolkit = 12.1.\n",
      "\\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.27. FA2 = False]\n",
      " \"-____-\"     Free Apache license: http://github.com/unslothai/unsloth\n"
     ]
    }
   ],
   "source": [
    "from unsloth import FastLanguageModel\n",
    "import torch\n",
    "max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!\n",
    "dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+\n",
    "load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.\n",
    "\n",
    "model, tokenizer = FastLanguageModel.from_pretrained(\n",
    "    model_name = \"unsloth/Phi-3.5-mini-instruct\",\n",
    "    max_seq_length = max_seq_length,\n",
    "    dtype = dtype,\n",
    "    load_in_4bit = load_in_4bit,\n",
    "    # token = \"hf_...\", # use one if using gated models like meta-llama/Llama-2-7b-hf\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "8712c5c8-c763-4743-bc8d-54b879433b73",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Unsloth: Already have LoRA adapters! We shall skip this step.\n"
     ]
    }
   ],
   "source": [
    "model = FastLanguageModel.get_peft_model(\n",
    "    model,\n",
    "    r = 32, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128\n",
    "    target_modules = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n",
    "                      \"gate_proj\", \"up_proj\", \"down_proj\",],\n",
    "    lora_alpha = 32,\n",
    "    lora_dropout = 0, # Supports any, but = 0 is optimized\n",
    "    bias = \"none\",    # Supports any, but = \"none\" is optimized\n",
    "    # [NEW] \"unsloth\" uses 30% less VRAM, fits 2x larger batch sizes!\n",
    "    use_gradient_checkpointing = \"unsloth\", # True or \"unsloth\" for very long context\n",
    "    random_state = 3407,\n",
    "    use_rslora = False,  # We support rank stabilized LoRA\n",
    "    loftq_config = None, # And LoftQ\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "c9d36fef-4c62-412d-81a8-2769a1b56042",
   "metadata": {},
   "outputs": [],
   "source": [
    "from datasets import load_dataset\n",
    "dataset = load_dataset(\"arbinMichael/testparquet\", split = \"train\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "452ad49e-b283-4655-9c99-f30c5eed681c",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<|user|>Perpare a schedule for current charge/discharge test, the value of the current is a linear variable, using Current Ramp(A) control type. The charging current start value is 0.5A, the rate of change of the current per second is 0.01, up to 4V then ; discharge current start value is -0.5A, the rate of change of the current per second is -0.01, discharging to 1V then end the test. Record one point per second<|end|><|assistant|>[{\"StepCtrlTypeString\":\"Rest\",\"CtrlValue\":\"\",\"Label\":\"Step_A\",\"StepLimits\":[{\"Equations\":\"PV_CHAN_Step_Time>=5\",\"GotoStep\":\"Next Step\"}],\"LogLimits\":[{\"Equations\":\"DV_Time>=1\",\"GotoStep\":\"Next Step\"}]},{\"StepCtrlTypeString\":\"Current Ramp(A)\",\"CtrlValue\":\"0.5\",\"Label\":\"Step_B\",\"StepLimits\":[{\"Equations\":\"PV_CHAN_Voltage>=4\",\"GotoStep\":\"Next Step\"}],\"LogLimits\":[{\"Equations\":\"DV_Time>=1\",\"GotoStep\":\"Next Step\"}]},{\"StepCtrlTypeString\":\"Current Ramp(A)\",\"CtrlValue\":\"-0.5\",\"Label\":\"Step_C\",\"StepLimits\":[{\"Equations\":\"PV_CHAN_Voltage<=1\",\"GotoStep\":\"Next Step\"}],\"LogLimits\":[{\"Equations\":\"DV_Time>=1\",\"GotoStep\":\"Next Step\"}]}]<|end|>\n"
     ]
    }
   ],
   "source": [
    "print(dataset[5][\"text\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "b0a39d9e-e3bf-4fae-8d75-dba12ccf15c8",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "max_steps is given, it will override any value given in num_train_epochs\n"
     ]
    }
   ],
   "source": [
    "from trl import SFTTrainer\n",
    "from transformers import TrainingArguments\n",
    "from unsloth import is_bfloat16_supported\n",
    "\n",
    "trainer = SFTTrainer(\n",
    "    model = model,\n",
    "    tokenizer = tokenizer,\n",
    "    train_dataset = dataset,\n",
    "    dataset_text_field = \"text\",\n",
    "    max_seq_length = max_seq_length,\n",
    "    dataset_num_proc = 2,\n",
    "    packing = False, # Can make training 5x faster for short sequences.\n",
    "    args = TrainingArguments(\n",
    "        per_device_train_batch_size = 2,\n",
    "        gradient_accumulation_steps = 4,\n",
    "        warmup_steps = 2,\n",
    "        max_steps = 30,\n",
    "        learning_rate = 2e-4,\n",
    "        fp16 = not is_bfloat16_supported(),\n",
    "        bf16 = is_bfloat16_supported(),\n",
    "        logging_steps = 1,\n",
    "        optim = \"adamw_8bit\",\n",
    "        weight_decay = 0.01,\n",
    "        lr_scheduler_type = \"linear\",\n",
    "        seed = 7444,\n",
    "        output_dir = \"outputs\",\n",
    "    ),\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "625e8b31-82d8-4930-a46e-a82803b4f211",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1\n",
      "   \\\\   /|    Num examples = 10 | Num Epochs = 30\n",
      "O^O/ \\_/ \\    Batch size per device = 2 | Gradient Accumulation steps = 4\n",
      "\\        /    Total batch size = 8 | Total steps = 30\n",
      " \"-____-\"     Number of trainable parameters = 59,768,832\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "\n",
       "    <div>\n",
       "      \n",
       "      <progress value='15' max='30' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
       "      [15/30 06:56 < 08:00, 0.03 it/s, Epoch 11.20/30]\n",
       "    </div>\n",
       "    <table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       " <tr style=\"text-align: left;\">\n",
       "      <th>Step</th>\n",
       "      <th>Training Loss</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>1.464500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>1.716400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>1.310200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>1.266100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>5</td>\n",
       "      <td>1.434100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>6</td>\n",
       "      <td>1.185100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>7</td>\n",
       "      <td>1.001900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>8</td>\n",
       "      <td>1.157100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>9</td>\n",
       "      <td>0.660400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>10</td>\n",
       "      <td>0.998400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>11</td>\n",
       "      <td>0.756400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>12</td>\n",
       "      <td>0.931600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>13</td>\n",
       "      <td>0.519000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table><p>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "trainer_stats = trainer.train()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "c407b9c0-aa4c-412a-b7cc-ddbdbb6a5212",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{\"StepCtrlTypeString\":\"Rest\",\"CtrlValue\":\"\",\"Label\":\"Step_A\",\"StepLimits\":[{\"Equations\":\"PV_CHAN_Step_Time>=10\",\"GotoStep\":\"Next Step\"}],\"LogLimits\":[{\"Equations\":\"PV_CHAN_Time>=60\",\"GotoStep\":\"Next Step\"}]},{\"StepCtrlTypeString\":\"Internal Resistance\",\"CtrlValue\":\"0.5\",\"Label\":\"Step_B\",\"StepLimits\":[{\"Equations\":\"PV_CHAN_Step_Time>=3\",\"Goto\n"
     ]
    }
   ],
   "source": [
    "from unsloth.chat_templates import get_chat_template\n",
    "\n",
    "tokenizer = get_chat_template(\n",
    "    tokenizer,\n",
    "    chat_template = \"phi-3\", # Supports zephyr, chatml, mistral, llama, alpaca, vicuna, vicuna_old, unsloth\n",
    "    mapping = {\"role\" : \"from\", \"content\" : \"value\", \"user\" : \"human\", \"assistant\" : \"gpt\"}, # ShareGPT style\n",
    ")\n",
    "\n",
    "FastLanguageModel.for_inference(model) # Enable native 2x faster inference\n",
    "\n",
    "messages = [\n",
    "    {\"from\": \"human\", \"value\": \"give me schedule that first step will log every 10 second and will last 60seconds, and second step will log every 3 second and will last 40 seconds\"},\n",
    "]\n",
    "inputs = tokenizer.apply_chat_template(\n",
    "    messages,\n",
    "    tokenize = True,\n",
    "    add_generation_prompt = True, # Must add for generation\n",
    "    return_tensors = \"pt\",\n",
    ").to(\"cuda\")\n",
    "\n",
    "from transformers import TextStreamer\n",
    "text_streamer = TextStreamer(tokenizer, skip_prompt = True)\n",
    "_ = model.generate(input_ids = inputs, streamer = text_streamer, max_new_tokens = 128, use_cache = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "069d4087-35c2-4d2e-b981-f5bc65bac44d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Unsloth: Merging 4bit and LoRA weights to 16bit...\n",
      "Unsloth: Will use up to 15.71 out of 28.5 RAM for saving.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|███████████████████████████████████████████| 32/32 [00:06<00:00,  4.69it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Unsloth: Saving tokenizer... Done.\n",
      "Unsloth: Saving model... This might take 5 minutes for Llama-7b...\n",
      "Done.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Unsloth: Converting llama model. Can use fast conversion = True.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "==((====))==  Unsloth: Conversion from QLoRA to GGUF information\n",
      "   \\\\   /|    [0] Installing llama.cpp will take 3 minutes.\n",
      "O^O/ \\_/ \\    [1] Converting HF to GGUF 16bits will take 3 minutes.\n",
      "\\        /    [2] Converting GGUF 16bits to ['q4_k_m'] will take 10 minutes each.\n",
      " \"-____-\"     In total, you will have to wait at least 16 minutes.\n",
      "\n",
      "Unsloth: [0] Installing llama.cpp. This will take 3 minutes...\n",
      "Unsloth: [1] Converting model at testmodel into bf16 GGUF format.\n",
      "The output location will be ./testmodel/unsloth.BF16.gguf\n",
      "This will take 3 minutes...\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Unsloth: Extending testmodel/tokenizer.model with added_tokens.json.\n",
      "Originally tokenizer.model is of size (32000).\n",
      "But we need to extend to sentencepiece vocab size (32011).\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:hf-to-gguf:Loading model: testmodel\n",
      "INFO:gguf.gguf_writer:gguf: This GGUF file is for Little Endian only\n",
      "INFO:hf-to-gguf:Exporting model...\n",
      "INFO:hf-to-gguf:gguf: loading model weight map from 'model.safetensors.index.json'\n",
      "INFO:hf-to-gguf:gguf: loading model part 'model-00001-of-00002.safetensors'\n",
      "INFO:hf-to-gguf:token_embd.weight,           torch.bfloat16 --> BF16, shape = {3072, 32064}\n",
      "INFO:hf-to-gguf:blk.0.attn_norm.weight,      torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.0.ffn_down.weight,       torch.bfloat16 --> BF16, shape = {8192, 3072}\n",
      "INFO:hf-to-gguf:blk.0.ffn_gate.weight,       torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.0.ffn_up.weight,         torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.0.ffn_norm.weight,       torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.0.attn_k.weight,         torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.0.attn_output.weight,    torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.0.attn_q.weight,         torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.0.attn_v.weight,         torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.1.attn_norm.weight,      torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.1.ffn_down.weight,       torch.bfloat16 --> BF16, shape = {8192, 3072}\n",
      "INFO:hf-to-gguf:blk.1.ffn_gate.weight,       torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.1.ffn_up.weight,         torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.1.ffn_norm.weight,       torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.1.attn_k.weight,         torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.1.attn_output.weight,    torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.1.attn_q.weight,         torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.1.attn_v.weight,         torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.10.attn_norm.weight,     torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.10.ffn_down.weight,      torch.bfloat16 --> BF16, shape = {8192, 3072}\n",
      "INFO:hf-to-gguf:blk.10.ffn_gate.weight,      torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.10.ffn_up.weight,        torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.10.ffn_norm.weight,      torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.10.attn_k.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.10.attn_output.weight,   torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.10.attn_q.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.10.attn_v.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.11.attn_norm.weight,     torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.11.ffn_down.weight,      torch.bfloat16 --> BF16, shape = {8192, 3072}\n",
      "INFO:hf-to-gguf:blk.11.ffn_gate.weight,      torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.11.ffn_up.weight,        torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.11.ffn_norm.weight,      torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.11.attn_k.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.11.attn_output.weight,   torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.11.attn_q.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.11.attn_v.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.12.attn_norm.weight,     torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.12.ffn_down.weight,      torch.bfloat16 --> BF16, shape = {8192, 3072}\n",
      "INFO:hf-to-gguf:blk.12.ffn_gate.weight,      torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.12.ffn_up.weight,        torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.12.ffn_norm.weight,      torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.12.attn_k.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.12.attn_output.weight,   torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.12.attn_q.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.12.attn_v.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.13.attn_norm.weight,     torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.13.ffn_down.weight,      torch.bfloat16 --> BF16, shape = {8192, 3072}\n",
      "INFO:hf-to-gguf:blk.13.ffn_gate.weight,      torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.13.ffn_up.weight,        torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.13.ffn_norm.weight,      torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.13.attn_k.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.13.attn_output.weight,   torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.13.attn_q.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.13.attn_v.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.14.attn_norm.weight,     torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.14.ffn_down.weight,      torch.bfloat16 --> BF16, shape = {8192, 3072}\n",
      "INFO:hf-to-gguf:blk.14.ffn_gate.weight,      torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.14.ffn_up.weight,        torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.14.ffn_norm.weight,      torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.14.attn_k.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.14.attn_output.weight,   torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.14.attn_q.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.14.attn_v.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.15.attn_norm.weight,     torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.15.ffn_down.weight,      torch.bfloat16 --> BF16, shape = {8192, 3072}\n",
      "INFO:hf-to-gguf:blk.15.ffn_gate.weight,      torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.15.ffn_up.weight,        torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.15.ffn_norm.weight,      torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.15.attn_k.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.15.attn_output.weight,   torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.15.attn_q.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.15.attn_v.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.16.attn_norm.weight,     torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.16.ffn_down.weight,      torch.bfloat16 --> BF16, shape = {8192, 3072}\n",
      "INFO:hf-to-gguf:blk.16.ffn_gate.weight,      torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.16.ffn_up.weight,        torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.16.ffn_norm.weight,      torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.16.attn_k.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.16.attn_output.weight,   torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.16.attn_q.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.16.attn_v.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.17.attn_norm.weight,     torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.17.ffn_down.weight,      torch.bfloat16 --> BF16, shape = {8192, 3072}\n",
      "INFO:hf-to-gguf:blk.17.ffn_gate.weight,      torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.17.ffn_up.weight,        torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.17.ffn_norm.weight,      torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.17.attn_k.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.17.attn_output.weight,   torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.17.attn_q.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.17.attn_v.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.18.attn_norm.weight,     torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.18.ffn_down.weight,      torch.bfloat16 --> BF16, shape = {8192, 3072}\n",
      "INFO:hf-to-gguf:blk.18.ffn_gate.weight,      torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.18.ffn_up.weight,        torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.18.ffn_norm.weight,      torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.18.attn_k.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.18.attn_output.weight,   torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.18.attn_q.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.18.attn_v.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.19.attn_norm.weight,     torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.19.ffn_down.weight,      torch.bfloat16 --> BF16, shape = {8192, 3072}\n",
      "INFO:hf-to-gguf:blk.19.ffn_gate.weight,      torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.19.ffn_up.weight,        torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.19.ffn_norm.weight,      torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.19.attn_k.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.19.attn_output.weight,   torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.19.attn_q.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.19.attn_v.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.2.attn_norm.weight,      torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.2.ffn_down.weight,       torch.bfloat16 --> BF16, shape = {8192, 3072}\n",
      "INFO:hf-to-gguf:blk.2.ffn_gate.weight,       torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.2.ffn_up.weight,         torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.2.ffn_norm.weight,       torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.2.attn_k.weight,         torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.2.attn_output.weight,    torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.2.attn_q.weight,         torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.2.attn_v.weight,         torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.20.attn_norm.weight,     torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.20.ffn_down.weight,      torch.bfloat16 --> BF16, shape = {8192, 3072}\n",
      "INFO:hf-to-gguf:blk.20.ffn_gate.weight,      torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.20.ffn_up.weight,        torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.20.ffn_norm.weight,      torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.20.attn_k.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.20.attn_output.weight,   torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.20.attn_q.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.20.attn_v.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.21.attn_k.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.21.attn_q.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.3.attn_norm.weight,      torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.3.ffn_down.weight,       torch.bfloat16 --> BF16, shape = {8192, 3072}\n",
      "INFO:hf-to-gguf:blk.3.ffn_gate.weight,       torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.3.ffn_up.weight,         torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.3.ffn_norm.weight,       torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.3.attn_k.weight,         torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.3.attn_output.weight,    torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.3.attn_q.weight,         torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.3.attn_v.weight,         torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.4.attn_norm.weight,      torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.4.ffn_down.weight,       torch.bfloat16 --> BF16, shape = {8192, 3072}\n",
      "INFO:hf-to-gguf:blk.4.ffn_gate.weight,       torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.4.ffn_up.weight,         torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.4.ffn_norm.weight,       torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.4.attn_k.weight,         torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.4.attn_output.weight,    torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.4.attn_q.weight,         torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.4.attn_v.weight,         torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.5.attn_norm.weight,      torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.5.ffn_down.weight,       torch.bfloat16 --> BF16, shape = {8192, 3072}\n",
      "INFO:hf-to-gguf:blk.5.ffn_gate.weight,       torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.5.ffn_up.weight,         torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.5.ffn_norm.weight,       torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.5.attn_k.weight,         torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.5.attn_output.weight,    torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.5.attn_q.weight,         torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.5.attn_v.weight,         torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.6.attn_norm.weight,      torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.6.ffn_down.weight,       torch.bfloat16 --> BF16, shape = {8192, 3072}\n",
      "INFO:hf-to-gguf:blk.6.ffn_gate.weight,       torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.6.ffn_up.weight,         torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.6.ffn_norm.weight,       torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.6.attn_k.weight,         torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.6.attn_output.weight,    torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.6.attn_q.weight,         torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.6.attn_v.weight,         torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.7.attn_norm.weight,      torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.7.ffn_down.weight,       torch.bfloat16 --> BF16, shape = {8192, 3072}\n",
      "INFO:hf-to-gguf:blk.7.ffn_gate.weight,       torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.7.ffn_up.weight,         torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.7.ffn_norm.weight,       torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.7.attn_k.weight,         torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.7.attn_output.weight,    torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.7.attn_q.weight,         torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.7.attn_v.weight,         torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.8.attn_norm.weight,      torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.8.ffn_down.weight,       torch.bfloat16 --> BF16, shape = {8192, 3072}\n",
      "INFO:hf-to-gguf:blk.8.ffn_gate.weight,       torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.8.ffn_up.weight,         torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.8.ffn_norm.weight,       torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.8.attn_k.weight,         torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.8.attn_output.weight,    torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.8.attn_q.weight,         torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.8.attn_v.weight,         torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.9.attn_norm.weight,      torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.9.ffn_down.weight,       torch.bfloat16 --> BF16, shape = {8192, 3072}\n",
      "INFO:hf-to-gguf:blk.9.ffn_gate.weight,       torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.9.ffn_up.weight,         torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.9.ffn_norm.weight,       torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.9.attn_k.weight,         torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.9.attn_output.weight,    torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.9.attn_q.weight,         torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.9.attn_v.weight,         torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:gguf: loading model part 'model-00002-of-00002.safetensors'\n",
      "INFO:hf-to-gguf:output.weight,               torch.bfloat16 --> BF16, shape = {3072, 32064}\n",
      "INFO:hf-to-gguf:blk.21.attn_norm.weight,     torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.21.ffn_down.weight,      torch.bfloat16 --> BF16, shape = {8192, 3072}\n",
      "INFO:hf-to-gguf:blk.21.ffn_gate.weight,      torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.21.ffn_up.weight,        torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.21.ffn_norm.weight,      torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.21.attn_output.weight,   torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.21.attn_v.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.22.attn_norm.weight,     torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.22.ffn_down.weight,      torch.bfloat16 --> BF16, shape = {8192, 3072}\n",
      "INFO:hf-to-gguf:blk.22.ffn_gate.weight,      torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.22.ffn_up.weight,        torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.22.ffn_norm.weight,      torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.22.attn_k.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.22.attn_output.weight,   torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.22.attn_q.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.22.attn_v.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.23.attn_norm.weight,     torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.23.ffn_down.weight,      torch.bfloat16 --> BF16, shape = {8192, 3072}\n",
      "INFO:hf-to-gguf:blk.23.ffn_gate.weight,      torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.23.ffn_up.weight,        torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.23.ffn_norm.weight,      torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.23.attn_k.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.23.attn_output.weight,   torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.23.attn_q.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.23.attn_v.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.24.attn_norm.weight,     torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.24.ffn_down.weight,      torch.bfloat16 --> BF16, shape = {8192, 3072}\n",
      "INFO:hf-to-gguf:blk.24.ffn_gate.weight,      torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.24.ffn_up.weight,        torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.24.ffn_norm.weight,      torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.24.attn_k.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.24.attn_output.weight,   torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.24.attn_q.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.24.attn_v.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.25.attn_norm.weight,     torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.25.ffn_down.weight,      torch.bfloat16 --> BF16, shape = {8192, 3072}\n",
      "INFO:hf-to-gguf:blk.25.ffn_gate.weight,      torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.25.ffn_up.weight,        torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.25.ffn_norm.weight,      torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.25.attn_k.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.25.attn_output.weight,   torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.25.attn_q.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.25.attn_v.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.26.attn_norm.weight,     torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.26.ffn_down.weight,      torch.bfloat16 --> BF16, shape = {8192, 3072}\n",
      "INFO:hf-to-gguf:blk.26.ffn_gate.weight,      torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.26.ffn_up.weight,        torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.26.ffn_norm.weight,      torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.26.attn_k.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.26.attn_output.weight,   torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.26.attn_q.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.26.attn_v.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.27.attn_norm.weight,     torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.27.ffn_down.weight,      torch.bfloat16 --> BF16, shape = {8192, 3072}\n",
      "INFO:hf-to-gguf:blk.27.ffn_gate.weight,      torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.27.ffn_up.weight,        torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.27.ffn_norm.weight,      torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.27.attn_k.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.27.attn_output.weight,   torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.27.attn_q.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.27.attn_v.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.28.attn_norm.weight,     torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.28.ffn_down.weight,      torch.bfloat16 --> BF16, shape = {8192, 3072}\n",
      "INFO:hf-to-gguf:blk.28.ffn_gate.weight,      torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.28.ffn_up.weight,        torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.28.ffn_norm.weight,      torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.28.attn_k.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.28.attn_output.weight,   torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.28.attn_q.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.28.attn_v.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.29.attn_norm.weight,     torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.29.ffn_down.weight,      torch.bfloat16 --> BF16, shape = {8192, 3072}\n",
      "INFO:hf-to-gguf:blk.29.ffn_gate.weight,      torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.29.ffn_up.weight,        torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.29.ffn_norm.weight,      torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.29.attn_k.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.29.attn_output.weight,   torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.29.attn_q.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.29.attn_v.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.30.attn_norm.weight,     torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.30.ffn_down.weight,      torch.bfloat16 --> BF16, shape = {8192, 3072}\n",
      "INFO:hf-to-gguf:blk.30.ffn_gate.weight,      torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.30.ffn_up.weight,        torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.30.ffn_norm.weight,      torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.30.attn_k.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.30.attn_output.weight,   torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.30.attn_q.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.30.attn_v.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.31.attn_norm.weight,     torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.31.ffn_down.weight,      torch.bfloat16 --> BF16, shape = {8192, 3072}\n",
      "INFO:hf-to-gguf:blk.31.ffn_gate.weight,      torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.31.ffn_up.weight,        torch.bfloat16 --> BF16, shape = {3072, 8192}\n",
      "INFO:hf-to-gguf:blk.31.ffn_norm.weight,      torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:blk.31.attn_k.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.31.attn_output.weight,   torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.31.attn_q.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:blk.31.attn_v.weight,        torch.bfloat16 --> BF16, shape = {3072, 3072}\n",
      "INFO:hf-to-gguf:output_norm.weight,          torch.bfloat16 --> F32, shape = {3072}\n",
      "INFO:hf-to-gguf:Set meta model\n",
      "INFO:hf-to-gguf:Set model parameters\n",
      "INFO:hf-to-gguf:gguf: context length = 131072\n",
      "INFO:hf-to-gguf:gguf: embedding length = 3072\n",
      "INFO:hf-to-gguf:gguf: feed forward length = 8192\n",
      "INFO:hf-to-gguf:gguf: head count = 32\n",
      "INFO:hf-to-gguf:gguf: key-value head count = 32\n",
      "INFO:hf-to-gguf:gguf: rope theta = 10000.0\n",
      "INFO:hf-to-gguf:gguf: rms norm epsilon = 1e-05\n",
      "INFO:hf-to-gguf:gguf: file type = 32\n",
      "INFO:hf-to-gguf:Set model tokenizer\n",
      "INFO:gguf.vocab:Setting special token type bos to 1\n",
      "INFO:gguf.vocab:Setting special token type eos to 32000\n",
      "INFO:gguf.vocab:Setting special token type unk to 0\n",
      "INFO:gguf.vocab:Setting special token type pad to 32009\n",
      "INFO:gguf.vocab:Setting add_bos_token to False\n",
      "INFO:gguf.vocab:Setting add_eos_token to False\n",
      "INFO:gguf.vocab:Setting chat_template to {% if 'role' in messages[0] %}{% for message in messages %}{% if message['role'] == 'user' %}{{'<|user|>\n",
      "' + message['content'] + '<|end|>\n",
      "'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>\n",
      "' + message['content'] + '<|end|>\n",
      "'}}{% else %}{{'<|' + message['role'] + '|>\n",
      "' + message['content'] + '<|end|>\n",
      "'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n",
      "' }}{% endif %}{% else %}{% for message in messages %}{% if message['from'] == 'human' %}{{'<|user|>\n",
      "' + message['value'] + '<|end|>\n",
      "'}}{% elif message['from'] == 'gpt' %}{{'<|assistant|>\n",
      "' + message['value'] + '<|end|>\n",
      "'}}{% else %}{{'<|' + message['from'] + '|>\n",
      "' + message['value'] + '<|end|>\n",
      "'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n",
      "' }}{% endif %}{% endif %}\n",
      "INFO:hf-to-gguf:Set model quantization version\n",
      "INFO:gguf.gguf_writer:Writing the following files:\n",
      "INFO:gguf.gguf_writer:testmodel/unsloth.BF16.gguf: n_tensors = 291, total_size = 7.6G\n",
      "Writing: 100%|██████████| 7.64G/7.64G [00:54<00:00, 140Mbyte/s]\n",
      "INFO:hf-to-gguf:Model successfully exported to testmodel/unsloth.BF16.gguf\n",
      "Unsloth: Conversion completed! Output location: ./testmodel/unsloth.BF16.gguf\n",
      "Unsloth: [2] Converting GGUF 16bit into q4_k_m. This will take 20 minutes...\n",
      "main: build = 3651 (8f1d81a0)\n",
      "main: built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu\n",
      "main: quantizing './testmodel/unsloth.BF16.gguf' to './testmodel/unsloth.Q4_K_M.gguf' as Q4_K_M using 24 threads\n",
      "llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from ./testmodel/unsloth.BF16.gguf (version GGUF V3 (latest))\n",
      "llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.\n",
      "llama_model_loader: - kv   0:                       general.architecture str              = llama\n",
      "llama_model_loader: - kv   1:                               general.type str              = model\n",
      "llama_model_loader: - kv   2:                               general.name str              = Phi 3.5 Mini Instruct Bnb 4bit\n",
      "llama_model_loader: - kv   3:                       general.organization str              = Unsloth\n",
      "llama_model_loader: - kv   4:                           general.finetune str              = instruct-bnb-4bit\n",
      "llama_model_loader: - kv   5:                           general.basename str              = phi-3.5\n",
      "llama_model_loader: - kv   6:                         general.size_label str              = mini\n",
      "llama_model_loader: - kv   7:                          llama.block_count u32              = 32\n",
      "llama_model_loader: - kv   8:                       llama.context_length u32              = 131072\n",
      "llama_model_loader: - kv   9:                     llama.embedding_length u32              = 3072\n",
      "llama_model_loader: - kv  10:                  llama.feed_forward_length u32              = 8192\n",
      "llama_model_loader: - kv  11:                 llama.attention.head_count u32              = 32\n",
      "llama_model_loader: - kv  12:              llama.attention.head_count_kv u32              = 32\n",
      "llama_model_loader: - kv  13:                       llama.rope.freq_base f32              = 10000.000000\n",
      "llama_model_loader: - kv  14:     llama.attention.layer_norm_rms_epsilon f32              = 0.000010\n",
      "llama_model_loader: - kv  15:                          general.file_type u32              = 32\n",
      "llama_model_loader: - kv  16:                           llama.vocab_size u32              = 32064\n",
      "llama_model_loader: - kv  17:                 llama.rope.dimension_count u32              = 96\n",
      "llama_model_loader: - kv  18:            tokenizer.ggml.add_space_prefix bool             = false\n",
      "llama_model_loader: - kv  19:                       tokenizer.ggml.model str              = llama\n",
      "llama_model_loader: - kv  20:                         tokenizer.ggml.pre str              = default\n",
      "llama_model_loader: - kv  21:                      tokenizer.ggml.tokens arr[str,32064]   = [\"<unk>\", \"<s>\", \"</s>\", \"<0x00>\", \"<...\n",
      "llama_model_loader: - kv  22:                      tokenizer.ggml.scores arr[f32,32064]   = [-1000.000000, -1000.000000, -1000.00...\n",
      "llama_model_loader: - kv  23:                  tokenizer.ggml.token_type arr[i32,32064]   = [3, 3, 4, 6, 6, 6, 6, 6, 6, 6, 6, 6, ...\n",
      "llama_model_loader: - kv  24:                tokenizer.ggml.bos_token_id u32              = 1\n",
      "llama_model_loader: - kv  25:                tokenizer.ggml.eos_token_id u32              = 32000\n",
      "llama_model_loader: - kv  26:            tokenizer.ggml.unknown_token_id u32              = 0\n",
      "llama_model_loader: - kv  27:            tokenizer.ggml.padding_token_id u32              = 32009\n",
      "llama_model_loader: - kv  28:               tokenizer.ggml.add_bos_token bool             = false\n",
      "llama_model_loader: - kv  29:               tokenizer.ggml.add_eos_token bool             = false\n",
      "llama_model_loader: - kv  30:                    tokenizer.chat_template str              = {% if 'role' in messages[0] %}{% for ...\n",
      "llama_model_loader: - kv  31:               general.quantization_version u32              = 2\n",
      "llama_model_loader: - type  f32:   65 tensors\n",
      "llama_model_loader: - type bf16:  226 tensors\n",
      "[   1/ 291]                    token_embd.weight - [ 3072, 32064,     1,     1], type =   bf16, converting to q4_K .. size =   187.88 MiB ->    52.84 MiB\n",
      "[   2/ 291]               blk.0.attn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[   3/ 291]                blk.0.ffn_down.weight - [ 8192,  3072,     1,     1], type =   bf16, converting to q6_K .. size =    48.00 MiB ->    19.69 MiB\n",
      "[   4/ 291]                blk.0.ffn_gate.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[   5/ 291]                  blk.0.ffn_up.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[   6/ 291]                blk.0.ffn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[   7/ 291]                  blk.0.attn_k.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[   8/ 291]             blk.0.attn_output.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[   9/ 291]                  blk.0.attn_q.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[  10/ 291]                  blk.0.attn_v.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q6_K .. size =    18.00 MiB ->     7.38 MiB\n",
      "[  11/ 291]               blk.1.attn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[  12/ 291]                blk.1.ffn_down.weight - [ 8192,  3072,     1,     1], type =   bf16, converting to q6_K .. size =    48.00 MiB ->    19.69 MiB\n",
      "[  13/ 291]                blk.1.ffn_gate.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[  14/ 291]                  blk.1.ffn_up.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[  15/ 291]                blk.1.ffn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[  16/ 291]                  blk.1.attn_k.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[  17/ 291]             blk.1.attn_output.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[  18/ 291]                  blk.1.attn_q.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[  19/ 291]                  blk.1.attn_v.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q6_K .. size =    18.00 MiB ->     7.38 MiB\n",
      "[  20/ 291]              blk.10.attn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[  21/ 291]               blk.10.ffn_down.weight - [ 8192,  3072,     1,     1], type =   bf16, converting to q6_K .. size =    48.00 MiB ->    19.69 MiB\n",
      "[  22/ 291]               blk.10.ffn_gate.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[  23/ 291]                 blk.10.ffn_up.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[  24/ 291]               blk.10.ffn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[  25/ 291]                 blk.10.attn_k.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[  26/ 291]            blk.10.attn_output.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[  27/ 291]                 blk.10.attn_q.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[  28/ 291]                 blk.10.attn_v.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q6_K .. size =    18.00 MiB ->     7.38 MiB\n",
      "[  29/ 291]              blk.11.attn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[  30/ 291]               blk.11.ffn_down.weight - [ 8192,  3072,     1,     1], type =   bf16, converting to q6_K .. size =    48.00 MiB ->    19.69 MiB\n",
      "[  31/ 291]               blk.11.ffn_gate.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[  32/ 291]                 blk.11.ffn_up.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[  33/ 291]               blk.11.ffn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[  34/ 291]                 blk.11.attn_k.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[  35/ 291]            blk.11.attn_output.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[  36/ 291]                 blk.11.attn_q.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[  37/ 291]                 blk.11.attn_v.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q6_K .. size =    18.00 MiB ->     7.38 MiB\n",
      "[  38/ 291]              blk.12.attn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[  39/ 291]               blk.12.ffn_down.weight - [ 8192,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[  40/ 291]               blk.12.ffn_gate.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[  41/ 291]                 blk.12.ffn_up.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[  42/ 291]               blk.12.ffn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[  43/ 291]                 blk.12.attn_k.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[  44/ 291]            blk.12.attn_output.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[  45/ 291]                 blk.12.attn_q.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[  46/ 291]                 blk.12.attn_v.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[  47/ 291]              blk.13.attn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[  48/ 291]               blk.13.ffn_down.weight - [ 8192,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[  49/ 291]               blk.13.ffn_gate.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[  50/ 291]                 blk.13.ffn_up.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[  51/ 291]               blk.13.ffn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[  52/ 291]                 blk.13.attn_k.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[  53/ 291]            blk.13.attn_output.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[  54/ 291]                 blk.13.attn_q.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[  55/ 291]                 blk.13.attn_v.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[  56/ 291]              blk.14.attn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[  57/ 291]               blk.14.ffn_down.weight - [ 8192,  3072,     1,     1], type =   bf16, converting to q6_K .. size =    48.00 MiB ->    19.69 MiB\n",
      "[  58/ 291]               blk.14.ffn_gate.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[  59/ 291]                 blk.14.ffn_up.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[  60/ 291]               blk.14.ffn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[  61/ 291]                 blk.14.attn_k.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[  62/ 291]            blk.14.attn_output.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[  63/ 291]                 blk.14.attn_q.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[  64/ 291]                 blk.14.attn_v.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q6_K .. size =    18.00 MiB ->     7.38 MiB\n",
      "[  65/ 291]              blk.15.attn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[  66/ 291]               blk.15.ffn_down.weight - [ 8192,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[  67/ 291]               blk.15.ffn_gate.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[  68/ 291]                 blk.15.ffn_up.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[  69/ 291]               blk.15.ffn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[  70/ 291]                 blk.15.attn_k.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[  71/ 291]            blk.15.attn_output.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[  72/ 291]                 blk.15.attn_q.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[  73/ 291]                 blk.15.attn_v.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[  74/ 291]              blk.16.attn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[  75/ 291]               blk.16.ffn_down.weight - [ 8192,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[  76/ 291]               blk.16.ffn_gate.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[  77/ 291]                 blk.16.ffn_up.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[  78/ 291]               blk.16.ffn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[  79/ 291]                 blk.16.attn_k.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[  80/ 291]            blk.16.attn_output.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[  81/ 291]                 blk.16.attn_q.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[  82/ 291]                 blk.16.attn_v.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[  83/ 291]              blk.17.attn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[  84/ 291]               blk.17.ffn_down.weight - [ 8192,  3072,     1,     1], type =   bf16, converting to q6_K .. size =    48.00 MiB ->    19.69 MiB\n",
      "[  85/ 291]               blk.17.ffn_gate.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[  86/ 291]                 blk.17.ffn_up.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[  87/ 291]               blk.17.ffn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[  88/ 291]                 blk.17.attn_k.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[  89/ 291]            blk.17.attn_output.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[  90/ 291]                 blk.17.attn_q.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[  91/ 291]                 blk.17.attn_v.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q6_K .. size =    18.00 MiB ->     7.38 MiB\n",
      "[  92/ 291]              blk.18.attn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[  93/ 291]               blk.18.ffn_down.weight - [ 8192,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[  94/ 291]               blk.18.ffn_gate.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[  95/ 291]                 blk.18.ffn_up.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[  96/ 291]               blk.18.ffn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[  97/ 291]                 blk.18.attn_k.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[  98/ 291]            blk.18.attn_output.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[  99/ 291]                 blk.18.attn_q.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 100/ 291]                 blk.18.attn_v.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 101/ 291]              blk.19.attn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[ 102/ 291]               blk.19.ffn_down.weight - [ 8192,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[ 103/ 291]               blk.19.ffn_gate.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[ 104/ 291]                 blk.19.ffn_up.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[ 105/ 291]               blk.19.ffn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[ 106/ 291]                 blk.19.attn_k.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 107/ 291]            blk.19.attn_output.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 108/ 291]                 blk.19.attn_q.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 109/ 291]                 blk.19.attn_v.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 110/ 291]               blk.2.attn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[ 111/ 291]                blk.2.ffn_down.weight - [ 8192,  3072,     1,     1], type =   bf16, converting to q6_K .. size =    48.00 MiB ->    19.69 MiB\n",
      "[ 112/ 291]                blk.2.ffn_gate.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[ 113/ 291]                  blk.2.ffn_up.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[ 114/ 291]                blk.2.ffn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[ 115/ 291]                  blk.2.attn_k.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 116/ 291]             blk.2.attn_output.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 117/ 291]                  blk.2.attn_q.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 118/ 291]                  blk.2.attn_v.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q6_K .. size =    18.00 MiB ->     7.38 MiB\n",
      "[ 119/ 291]              blk.20.attn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[ 120/ 291]               blk.20.ffn_down.weight - [ 8192,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[ 121/ 291]               blk.20.ffn_gate.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[ 122/ 291]                 blk.20.ffn_up.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[ 123/ 291]               blk.20.ffn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[ 124/ 291]                 blk.20.attn_k.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 125/ 291]            blk.20.attn_output.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 126/ 291]                 blk.20.attn_q.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 127/ 291]                 blk.20.attn_v.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 128/ 291]                 blk.21.attn_k.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 129/ 291]                 blk.21.attn_q.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 130/ 291]               blk.3.attn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[ 131/ 291]                blk.3.ffn_down.weight - [ 8192,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[ 132/ 291]                blk.3.ffn_gate.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[ 133/ 291]                  blk.3.ffn_up.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[ 134/ 291]                blk.3.ffn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[ 135/ 291]                  blk.3.attn_k.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 136/ 291]             blk.3.attn_output.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 137/ 291]                  blk.3.attn_q.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 138/ 291]                  blk.3.attn_v.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 139/ 291]               blk.4.attn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[ 140/ 291]                blk.4.ffn_down.weight - [ 8192,  3072,     1,     1], type =   bf16, converting to q6_K .. size =    48.00 MiB ->    19.69 MiB\n",
      "[ 141/ 291]                blk.4.ffn_gate.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[ 142/ 291]                  blk.4.ffn_up.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[ 143/ 291]                blk.4.ffn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[ 144/ 291]                  blk.4.attn_k.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 145/ 291]             blk.4.attn_output.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 146/ 291]                  blk.4.attn_q.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 147/ 291]                  blk.4.attn_v.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q6_K .. size =    18.00 MiB ->     7.38 MiB\n",
      "[ 148/ 291]               blk.5.attn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[ 149/ 291]                blk.5.ffn_down.weight - [ 8192,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[ 150/ 291]                blk.5.ffn_gate.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[ 151/ 291]                  blk.5.ffn_up.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[ 152/ 291]                blk.5.ffn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[ 153/ 291]                  blk.5.attn_k.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 154/ 291]             blk.5.attn_output.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 155/ 291]                  blk.5.attn_q.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 156/ 291]                  blk.5.attn_v.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 157/ 291]               blk.6.attn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[ 158/ 291]                blk.6.ffn_down.weight - [ 8192,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[ 159/ 291]                blk.6.ffn_gate.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[ 160/ 291]                  blk.6.ffn_up.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[ 161/ 291]                blk.6.ffn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[ 162/ 291]                  blk.6.attn_k.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 163/ 291]             blk.6.attn_output.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 164/ 291]                  blk.6.attn_q.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 165/ 291]                  blk.6.attn_v.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 166/ 291]               blk.7.attn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[ 167/ 291]                blk.7.ffn_down.weight - [ 8192,  3072,     1,     1], type =   bf16, converting to q6_K .. size =    48.00 MiB ->    19.69 MiB\n",
      "[ 168/ 291]                blk.7.ffn_gate.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[ 169/ 291]                  blk.7.ffn_up.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[ 170/ 291]                blk.7.ffn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[ 171/ 291]                  blk.7.attn_k.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 172/ 291]             blk.7.attn_output.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 173/ 291]                  blk.7.attn_q.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 174/ 291]                  blk.7.attn_v.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q6_K .. size =    18.00 MiB ->     7.38 MiB\n",
      "[ 175/ 291]               blk.8.attn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[ 176/ 291]                blk.8.ffn_down.weight - [ 8192,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[ 177/ 291]                blk.8.ffn_gate.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[ 178/ 291]                  blk.8.ffn_up.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[ 179/ 291]                blk.8.ffn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[ 180/ 291]                  blk.8.attn_k.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 181/ 291]             blk.8.attn_output.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 182/ 291]                  blk.8.attn_q.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 183/ 291]                  blk.8.attn_v.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 184/ 291]               blk.9.attn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[ 185/ 291]                blk.9.ffn_down.weight - [ 8192,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[ 186/ 291]                blk.9.ffn_gate.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[ 187/ 291]                  blk.9.ffn_up.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[ 188/ 291]                blk.9.ffn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[ 189/ 291]                  blk.9.attn_k.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 190/ 291]             blk.9.attn_output.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 191/ 291]                  blk.9.attn_q.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 192/ 291]                  blk.9.attn_v.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 193/ 291]                        output.weight - [ 3072, 32064,     1,     1], type =   bf16, converting to q6_K .. size =   187.88 MiB ->    77.06 MiB\n",
      "[ 194/ 291]              blk.21.attn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[ 195/ 291]               blk.21.ffn_down.weight - [ 8192,  3072,     1,     1], type =   bf16, converting to q6_K .. size =    48.00 MiB ->    19.69 MiB\n",
      "[ 196/ 291]               blk.21.ffn_gate.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[ 197/ 291]                 blk.21.ffn_up.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[ 198/ 291]               blk.21.ffn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[ 199/ 291]            blk.21.attn_output.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 200/ 291]                 blk.21.attn_v.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q6_K .. size =    18.00 MiB ->     7.38 MiB\n",
      "[ 201/ 291]              blk.22.attn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[ 202/ 291]               blk.22.ffn_down.weight - [ 8192,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[ 203/ 291]               blk.22.ffn_gate.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[ 204/ 291]                 blk.22.ffn_up.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[ 205/ 291]               blk.22.ffn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[ 206/ 291]                 blk.22.attn_k.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 207/ 291]            blk.22.attn_output.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 208/ 291]                 blk.22.attn_q.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 209/ 291]                 blk.22.attn_v.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 210/ 291]              blk.23.attn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[ 211/ 291]               blk.23.ffn_down.weight - [ 8192,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[ 212/ 291]               blk.23.ffn_gate.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[ 213/ 291]                 blk.23.ffn_up.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[ 214/ 291]               blk.23.ffn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[ 215/ 291]                 blk.23.attn_k.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 216/ 291]            blk.23.attn_output.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 217/ 291]                 blk.23.attn_q.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 218/ 291]                 blk.23.attn_v.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 219/ 291]              blk.24.attn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[ 220/ 291]               blk.24.ffn_down.weight - [ 8192,  3072,     1,     1], type =   bf16, converting to q6_K .. size =    48.00 MiB ->    19.69 MiB\n",
      "[ 221/ 291]               blk.24.ffn_gate.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[ 222/ 291]                 blk.24.ffn_up.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[ 223/ 291]               blk.24.ffn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[ 224/ 291]                 blk.24.attn_k.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 225/ 291]            blk.24.attn_output.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 226/ 291]                 blk.24.attn_q.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 227/ 291]                 blk.24.attn_v.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q6_K .. size =    18.00 MiB ->     7.38 MiB\n",
      "[ 228/ 291]              blk.25.attn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[ 229/ 291]               blk.25.ffn_down.weight - [ 8192,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[ 230/ 291]               blk.25.ffn_gate.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[ 231/ 291]                 blk.25.ffn_up.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[ 232/ 291]               blk.25.ffn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[ 233/ 291]                 blk.25.attn_k.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 234/ 291]            blk.25.attn_output.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 235/ 291]                 blk.25.attn_q.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 236/ 291]                 blk.25.attn_v.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 237/ 291]              blk.26.attn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[ 238/ 291]               blk.26.ffn_down.weight - [ 8192,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[ 239/ 291]               blk.26.ffn_gate.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[ 240/ 291]                 blk.26.ffn_up.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[ 241/ 291]               blk.26.ffn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[ 242/ 291]                 blk.26.attn_k.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 243/ 291]            blk.26.attn_output.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 244/ 291]                 blk.26.attn_q.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 245/ 291]                 blk.26.attn_v.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 246/ 291]              blk.27.attn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[ 247/ 291]               blk.27.ffn_down.weight - [ 8192,  3072,     1,     1], type =   bf16, converting to q6_K .. size =    48.00 MiB ->    19.69 MiB\n",
      "[ 248/ 291]               blk.27.ffn_gate.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[ 249/ 291]                 blk.27.ffn_up.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[ 250/ 291]               blk.27.ffn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[ 251/ 291]                 blk.27.attn_k.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 252/ 291]            blk.27.attn_output.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 253/ 291]                 blk.27.attn_q.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 254/ 291]                 blk.27.attn_v.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q6_K .. size =    18.00 MiB ->     7.38 MiB\n",
      "[ 255/ 291]              blk.28.attn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[ 256/ 291]               blk.28.ffn_down.weight - [ 8192,  3072,     1,     1], type =   bf16, converting to q6_K .. size =    48.00 MiB ->    19.69 MiB\n",
      "[ 257/ 291]               blk.28.ffn_gate.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[ 258/ 291]                 blk.28.ffn_up.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[ 259/ 291]               blk.28.ffn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[ 260/ 291]                 blk.28.attn_k.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 261/ 291]            blk.28.attn_output.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 262/ 291]                 blk.28.attn_q.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 263/ 291]                 blk.28.attn_v.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q6_K .. size =    18.00 MiB ->     7.38 MiB\n",
      "[ 264/ 291]              blk.29.attn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[ 265/ 291]               blk.29.ffn_down.weight - [ 8192,  3072,     1,     1], type =   bf16, converting to q6_K .. size =    48.00 MiB ->    19.69 MiB\n",
      "[ 266/ 291]               blk.29.ffn_gate.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[ 267/ 291]                 blk.29.ffn_up.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[ 268/ 291]               blk.29.ffn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[ 269/ 291]                 blk.29.attn_k.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 270/ 291]            blk.29.attn_output.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 271/ 291]                 blk.29.attn_q.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 272/ 291]                 blk.29.attn_v.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q6_K .. size =    18.00 MiB ->     7.38 MiB\n",
      "[ 273/ 291]              blk.30.attn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[ 274/ 291]               blk.30.ffn_down.weight - [ 8192,  3072,     1,     1], type =   bf16, converting to q6_K .. size =    48.00 MiB ->    19.69 MiB\n",
      "[ 275/ 291]               blk.30.ffn_gate.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[ 276/ 291]                 blk.30.ffn_up.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[ 277/ 291]               blk.30.ffn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[ 278/ 291]                 blk.30.attn_k.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 279/ 291]            blk.30.attn_output.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 280/ 291]                 blk.30.attn_q.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 281/ 291]                 blk.30.attn_v.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q6_K .. size =    18.00 MiB ->     7.38 MiB\n",
      "[ 282/ 291]              blk.31.attn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[ 283/ 291]               blk.31.ffn_down.weight - [ 8192,  3072,     1,     1], type =   bf16, converting to q6_K .. size =    48.00 MiB ->    19.69 MiB\n",
      "[ 284/ 291]               blk.31.ffn_gate.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[ 285/ 291]                 blk.31.ffn_up.weight - [ 3072,  8192,     1,     1], type =   bf16, converting to q4_K .. size =    48.00 MiB ->    13.50 MiB\n",
      "[ 286/ 291]               blk.31.ffn_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "[ 287/ 291]                 blk.31.attn_k.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 288/ 291]            blk.31.attn_output.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 289/ 291]                 blk.31.attn_q.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q4_K .. size =    18.00 MiB ->     5.06 MiB\n",
      "[ 290/ 291]                 blk.31.attn_v.weight - [ 3072,  3072,     1,     1], type =   bf16, converting to q6_K .. size =    18.00 MiB ->     7.38 MiB\n",
      "[ 291/ 291]                   output_norm.weight - [ 3072,     1,     1,     1], type =    f32, size =    0.012 MB\n",
      "llama_model_quantize_internal: model size  =  7288.51 MB\n",
      "llama_model_quantize_internal: quant size  =  2210.78 MB\n",
      "\n",
      "main: quantize time = 85490.82 ms\n",
      "main:    total time = 85490.83 ms\n",
      "Unsloth: Conversion completed! Output location: ./testmodel/unsloth.Q4_K_M.gguf\n",
      "Unsloth: Saved Ollama Modelfile to testmodel/Modelfile\n"
     ]
    }
   ],
   "source": [
    "model.save_pretrained_gguf(\"testmodel\", tokenizer, quantization_method = \"q4_k_m\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "de91a837-490d-468a-9d2e-08d24453dbfe",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Unsloth: Merging 4bit and LoRA weights to 16bit...\n",
      "Unsloth: Will use up to 15.36 out of 28.5 RAM for saving.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|███████████████████████████████████████████| 40/40 [01:22<00:00,  2.07s/it]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Unsloth: Saving tokenizer... Done.\n",
      "Unsloth: Saving model... This might take 5 minutes for Llama-7b...\n",
      "Done.\n",
      "==((====))==  Unsloth: Conversion from QLoRA to GGUF information\n",
      "   \\\\   /|    [0] Installing llama.cpp will take 3 minutes.\n",
      "O^O/ \\_/ \\    [1] Converting HF to GGUF 16bits will take 3 minutes.\n",
      "\\        /    [2] Converting GGUF 16bits to ['q4_k_m'] will take 10 minutes each.\n",
      " \"-____-\"     In total, you will have to wait at least 16 minutes.\n",
      "\n",
      "Unsloth: [0] Installing llama.cpp. This will take 3 minutes...\n",
      "Unsloth: [1] Converting model at arbinMichael/phimed into bf16 GGUF format.\n",
      "The output location will be ./arbinMichael/phimed/unsloth.BF16.gguf\n",
      "This will take 3 minutes...\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Unsloth: Extending arbinMichael/phimed/tokenizer.model with added_tokens.json.\n",
      "Originally tokenizer.model is of size (32000).\n",
      "But we need to extend to sentencepiece vocab size (32011).\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:hf-to-gguf:Loading model: phimed\n",
      "INFO:gguf.gguf_writer:gguf: This GGUF file is for Little Endian only\n",
      "INFO:hf-to-gguf:Exporting model...\n",
      "INFO:hf-to-gguf:gguf: loading model weight map from 'model.safetensors.index.json'\n",
      "INFO:hf-to-gguf:gguf: loading model part 'model-00001-of-00006.safetensors'\n",
      "INFO:hf-to-gguf:token_embd.weight,           torch.bfloat16 --> BF16, shape = {5120, 32064}\n",
      "INFO:hf-to-gguf:blk.0.attn_norm.weight,      torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.0.ffn_down.weight,       torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
      "INFO:hf-to-gguf:blk.0.ffn_gate.weight,       torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.0.ffn_up.weight,         torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.0.ffn_norm.weight,       torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.0.attn_k.weight,         torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.0.attn_output.weight,    torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.0.attn_q.weight,         torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.0.attn_v.weight,         torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.1.attn_norm.weight,      torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.1.ffn_down.weight,       torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
      "INFO:hf-to-gguf:blk.1.ffn_gate.weight,       torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.1.ffn_up.weight,         torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.1.ffn_norm.weight,       torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.1.attn_k.weight,         torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.1.attn_output.weight,    torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.1.attn_q.weight,         torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.1.attn_v.weight,         torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.2.attn_norm.weight,      torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.2.ffn_down.weight,       torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
      "INFO:hf-to-gguf:blk.2.ffn_gate.weight,       torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.2.ffn_up.weight,         torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.2.ffn_norm.weight,       torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.2.attn_k.weight,         torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.2.attn_output.weight,    torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.2.attn_q.weight,         torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.2.attn_v.weight,         torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.3.attn_norm.weight,      torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.3.ffn_down.weight,       torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
      "INFO:hf-to-gguf:blk.3.ffn_gate.weight,       torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.3.ffn_up.weight,         torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.3.ffn_norm.weight,       torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.3.attn_k.weight,         torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.3.attn_output.weight,    torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.3.attn_q.weight,         torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.3.attn_v.weight,         torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.4.attn_norm.weight,      torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.4.ffn_down.weight,       torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
      "INFO:hf-to-gguf:blk.4.ffn_gate.weight,       torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.4.ffn_up.weight,         torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.4.ffn_norm.weight,       torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.4.attn_k.weight,         torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.4.attn_output.weight,    torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.4.attn_q.weight,         torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.4.attn_v.weight,         torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.5.attn_norm.weight,      torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.5.ffn_down.weight,       torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
      "INFO:hf-to-gguf:blk.5.ffn_gate.weight,       torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.5.ffn_up.weight,         torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.5.ffn_norm.weight,       torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.5.attn_k.weight,         torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.5.attn_output.weight,    torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.5.attn_q.weight,         torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.5.attn_v.weight,         torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.6.ffn_gate.weight,       torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.6.ffn_up.weight,         torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.6.attn_k.weight,         torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.6.attn_output.weight,    torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.6.attn_q.weight,         torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.6.attn_v.weight,         torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:gguf: loading model part 'model-00002-of-00006.safetensors'\n",
      "INFO:hf-to-gguf:blk.10.attn_norm.weight,     torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.10.ffn_down.weight,      torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
      "INFO:hf-to-gguf:blk.10.ffn_gate.weight,      torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.10.ffn_up.weight,        torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.10.ffn_norm.weight,      torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.10.attn_k.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.10.attn_output.weight,   torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.10.attn_q.weight,        torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.10.attn_v.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.11.attn_norm.weight,     torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.11.ffn_down.weight,      torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
      "INFO:hf-to-gguf:blk.11.ffn_gate.weight,      torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.11.ffn_up.weight,        torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.11.ffn_norm.weight,      torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.11.attn_k.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.11.attn_output.weight,   torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.11.attn_q.weight,        torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.11.attn_v.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.12.attn_norm.weight,     torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.12.ffn_down.weight,      torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
      "INFO:hf-to-gguf:blk.12.ffn_gate.weight,      torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.12.ffn_up.weight,        torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.12.ffn_norm.weight,      torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.12.attn_k.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.12.attn_output.weight,   torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.12.attn_q.weight,        torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.12.attn_v.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.13.attn_norm.weight,     torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.13.ffn_down.weight,      torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
      "INFO:hf-to-gguf:blk.13.ffn_gate.weight,      torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.13.ffn_up.weight,        torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.13.ffn_norm.weight,      torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.13.attn_k.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.13.attn_output.weight,   torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.13.attn_q.weight,        torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.13.attn_v.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.6.attn_norm.weight,      torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.6.ffn_down.weight,       torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
      "INFO:hf-to-gguf:blk.6.ffn_norm.weight,       torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.7.attn_norm.weight,      torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.7.ffn_down.weight,       torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
      "INFO:hf-to-gguf:blk.7.ffn_gate.weight,       torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.7.ffn_up.weight,         torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.7.ffn_norm.weight,       torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.7.attn_k.weight,         torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.7.attn_output.weight,    torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.7.attn_q.weight,         torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.7.attn_v.weight,         torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.8.attn_norm.weight,      torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.8.ffn_down.weight,       torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
      "INFO:hf-to-gguf:blk.8.ffn_gate.weight,       torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.8.ffn_up.weight,         torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.8.ffn_norm.weight,       torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.8.attn_k.weight,         torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.8.attn_output.weight,    torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.8.attn_q.weight,         torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.8.attn_v.weight,         torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.9.attn_norm.weight,      torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.9.ffn_down.weight,       torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
      "INFO:hf-to-gguf:blk.9.ffn_gate.weight,       torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.9.ffn_up.weight,         torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.9.ffn_norm.weight,       torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.9.attn_k.weight,         torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.9.attn_output.weight,    torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.9.attn_q.weight,         torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.9.attn_v.weight,         torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:gguf: loading model part 'model-00003-of-00006.safetensors'\n",
      "INFO:hf-to-gguf:blk.14.attn_norm.weight,     torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.14.ffn_down.weight,      torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
      "INFO:hf-to-gguf:blk.14.ffn_gate.weight,      torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.14.ffn_up.weight,        torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.14.ffn_norm.weight,      torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.14.attn_k.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.14.attn_output.weight,   torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.14.attn_q.weight,        torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.14.attn_v.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.15.attn_norm.weight,     torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.15.ffn_down.weight,      torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
      "INFO:hf-to-gguf:blk.15.ffn_gate.weight,      torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.15.ffn_up.weight,        torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.15.ffn_norm.weight,      torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.15.attn_k.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.15.attn_output.weight,   torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.15.attn_q.weight,        torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.15.attn_v.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.16.attn_norm.weight,     torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.16.ffn_down.weight,      torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
      "INFO:hf-to-gguf:blk.16.ffn_gate.weight,      torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.16.ffn_up.weight,        torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.16.ffn_norm.weight,      torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.16.attn_k.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.16.attn_output.weight,   torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.16.attn_q.weight,        torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.16.attn_v.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.17.attn_norm.weight,     torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.17.ffn_down.weight,      torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
      "INFO:hf-to-gguf:blk.17.ffn_gate.weight,      torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.17.ffn_up.weight,        torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.17.ffn_norm.weight,      torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.17.attn_k.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.17.attn_output.weight,   torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.17.attn_q.weight,        torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.17.attn_v.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.18.attn_norm.weight,     torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.18.ffn_down.weight,      torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
      "INFO:hf-to-gguf:blk.18.ffn_gate.weight,      torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.18.ffn_up.weight,        torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.18.ffn_norm.weight,      torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.18.attn_k.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.18.attn_output.weight,   torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.18.attn_q.weight,        torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.18.attn_v.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.19.attn_norm.weight,     torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.19.ffn_down.weight,      torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
      "INFO:hf-to-gguf:blk.19.ffn_gate.weight,      torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.19.ffn_up.weight,        torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.19.ffn_norm.weight,      torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.19.attn_k.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.19.attn_output.weight,   torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.19.attn_q.weight,        torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.19.attn_v.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.20.attn_norm.weight,     torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.20.ffn_down.weight,      torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
      "INFO:hf-to-gguf:blk.20.ffn_gate.weight,      torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.20.ffn_up.weight,        torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.20.ffn_norm.weight,      torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.20.attn_k.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.20.attn_output.weight,   torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.20.attn_q.weight,        torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.20.attn_v.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.21.attn_k.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.21.attn_output.weight,   torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.21.attn_q.weight,        torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.21.attn_v.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:gguf: loading model part 'model-00004-of-00006.safetensors'\n",
      "INFO:hf-to-gguf:blk.21.attn_norm.weight,     torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.21.ffn_down.weight,      torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
      "INFO:hf-to-gguf:blk.21.ffn_gate.weight,      torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.21.ffn_up.weight,        torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.21.ffn_norm.weight,      torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.22.attn_norm.weight,     torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.22.ffn_down.weight,      torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
      "INFO:hf-to-gguf:blk.22.ffn_gate.weight,      torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.22.ffn_up.weight,        torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.22.ffn_norm.weight,      torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.22.attn_k.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.22.attn_output.weight,   torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.22.attn_q.weight,        torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.22.attn_v.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.23.attn_norm.weight,     torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.23.ffn_down.weight,      torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
      "INFO:hf-to-gguf:blk.23.ffn_gate.weight,      torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.23.ffn_up.weight,        torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.23.ffn_norm.weight,      torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.23.attn_k.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.23.attn_output.weight,   torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.23.attn_q.weight,        torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.23.attn_v.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.24.attn_norm.weight,     torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.24.ffn_down.weight,      torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
      "INFO:hf-to-gguf:blk.24.ffn_gate.weight,      torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.24.ffn_up.weight,        torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.24.ffn_norm.weight,      torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.24.attn_k.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.24.attn_output.weight,   torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.24.attn_q.weight,        torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.24.attn_v.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.25.attn_norm.weight,     torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.25.ffn_down.weight,      torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
      "INFO:hf-to-gguf:blk.25.ffn_gate.weight,      torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.25.ffn_up.weight,        torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.25.ffn_norm.weight,      torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.25.attn_k.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.25.attn_output.weight,   torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.25.attn_q.weight,        torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.25.attn_v.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.26.attn_norm.weight,     torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.26.ffn_down.weight,      torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
      "INFO:hf-to-gguf:blk.26.ffn_gate.weight,      torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.26.ffn_up.weight,        torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.26.ffn_norm.weight,      torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.26.attn_k.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.26.attn_output.weight,   torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.26.attn_q.weight,        torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.26.attn_v.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.27.attn_norm.weight,     torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.27.ffn_down.weight,      torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
      "INFO:hf-to-gguf:blk.27.ffn_gate.weight,      torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.27.ffn_up.weight,        torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.27.ffn_norm.weight,      torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.27.attn_k.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.27.attn_output.weight,   torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.27.attn_q.weight,        torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.27.attn_v.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.28.ffn_gate.weight,      torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.28.attn_k.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.28.attn_output.weight,   torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.28.attn_q.weight,        torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.28.attn_v.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:gguf: loading model part 'model-00005-of-00006.safetensors'\n",
      "INFO:hf-to-gguf:blk.28.attn_norm.weight,     torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.28.ffn_down.weight,      torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
      "INFO:hf-to-gguf:blk.28.ffn_up.weight,        torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.28.ffn_norm.weight,      torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.29.attn_norm.weight,     torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.29.ffn_down.weight,      torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
      "INFO:hf-to-gguf:blk.29.ffn_gate.weight,      torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.29.ffn_up.weight,        torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.29.ffn_norm.weight,      torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.29.attn_k.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.29.attn_output.weight,   torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.29.attn_q.weight,        torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.29.attn_v.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.30.attn_norm.weight,     torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.30.ffn_down.weight,      torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
      "INFO:hf-to-gguf:blk.30.ffn_gate.weight,      torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.30.ffn_up.weight,        torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.30.ffn_norm.weight,      torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.30.attn_k.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.30.attn_output.weight,   torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.30.attn_q.weight,        torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.30.attn_v.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.31.attn_norm.weight,     torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.31.ffn_down.weight,      torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
      "INFO:hf-to-gguf:blk.31.ffn_gate.weight,      torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.31.ffn_up.weight,        torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.31.ffn_norm.weight,      torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.31.attn_k.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.31.attn_output.weight,   torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.31.attn_q.weight,        torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.31.attn_v.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.32.attn_norm.weight,     torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.32.ffn_down.weight,      torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
      "INFO:hf-to-gguf:blk.32.ffn_gate.weight,      torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.32.ffn_up.weight,        torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.32.ffn_norm.weight,      torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.32.attn_k.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.32.attn_output.weight,   torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.32.attn_q.weight,        torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.32.attn_v.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.33.attn_norm.weight,     torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.33.ffn_down.weight,      torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
      "INFO:hf-to-gguf:blk.33.ffn_gate.weight,      torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.33.ffn_up.weight,        torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.33.ffn_norm.weight,      torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.33.attn_k.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.33.attn_output.weight,   torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.33.attn_q.weight,        torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.33.attn_v.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.34.attn_norm.weight,     torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.34.ffn_down.weight,      torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
      "INFO:hf-to-gguf:blk.34.ffn_gate.weight,      torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.34.ffn_up.weight,        torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.34.ffn_norm.weight,      torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.34.attn_k.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.34.attn_output.weight,   torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.34.attn_q.weight,        torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.34.attn_v.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.35.ffn_gate.weight,      torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.35.ffn_up.weight,        torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.35.attn_k.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.35.attn_output.weight,   torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.35.attn_q.weight,        torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.35.attn_v.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:gguf: loading model part 'model-00006-of-00006.safetensors'\n",
      "INFO:hf-to-gguf:output.weight,               torch.bfloat16 --> BF16, shape = {5120, 32064}\n",
      "INFO:hf-to-gguf:blk.35.attn_norm.weight,     torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.35.ffn_down.weight,      torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
      "INFO:hf-to-gguf:blk.35.ffn_norm.weight,      torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.36.attn_norm.weight,     torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.36.ffn_down.weight,      torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
      "INFO:hf-to-gguf:blk.36.ffn_gate.weight,      torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.36.ffn_up.weight,        torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.36.ffn_norm.weight,      torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.36.attn_k.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.36.attn_output.weight,   torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.36.attn_q.weight,        torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.36.attn_v.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.37.attn_norm.weight,     torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.37.ffn_down.weight,      torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
      "INFO:hf-to-gguf:blk.37.ffn_gate.weight,      torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.37.ffn_up.weight,        torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.37.ffn_norm.weight,      torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.37.attn_k.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.37.attn_output.weight,   torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.37.attn_q.weight,        torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.37.attn_v.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.38.attn_norm.weight,     torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.38.ffn_down.weight,      torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
      "INFO:hf-to-gguf:blk.38.ffn_gate.weight,      torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.38.ffn_up.weight,        torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.38.ffn_norm.weight,      torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.38.attn_k.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.38.attn_output.weight,   torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.38.attn_q.weight,        torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.38.attn_v.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.39.attn_norm.weight,     torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.39.ffn_down.weight,      torch.bfloat16 --> BF16, shape = {17920, 5120}\n",
      "INFO:hf-to-gguf:blk.39.ffn_gate.weight,      torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.39.ffn_up.weight,        torch.bfloat16 --> BF16, shape = {5120, 17920}\n",
      "INFO:hf-to-gguf:blk.39.ffn_norm.weight,      torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:blk.39.attn_k.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:blk.39.attn_output.weight,   torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.39.attn_q.weight,        torch.bfloat16 --> BF16, shape = {5120, 5120}\n",
      "INFO:hf-to-gguf:blk.39.attn_v.weight,        torch.bfloat16 --> BF16, shape = {5120, 1280}\n",
      "INFO:hf-to-gguf:output_norm.weight,          torch.bfloat16 --> F32, shape = {5120}\n",
      "INFO:hf-to-gguf:Set meta model\n",
      "INFO:hf-to-gguf:Set model parameters\n",
      "INFO:hf-to-gguf:gguf: context length = 4096\n",
      "INFO:hf-to-gguf:gguf: embedding length = 5120\n",
      "INFO:hf-to-gguf:gguf: feed forward length = 17920\n",
      "INFO:hf-to-gguf:gguf: head count = 40\n",
      "INFO:hf-to-gguf:gguf: key-value head count = 10\n",
      "INFO:hf-to-gguf:gguf: rope theta = 10000.0\n",
      "INFO:hf-to-gguf:gguf: rms norm epsilon = 1e-05\n",
      "INFO:hf-to-gguf:gguf: file type = 32\n",
      "INFO:hf-to-gguf:Set model tokenizer\n",
      "INFO:gguf.vocab:Setting special token type bos to 1\n",
      "INFO:gguf.vocab:Setting special token type eos to 32000\n",
      "INFO:gguf.vocab:Setting special token type unk to 0\n",
      "INFO:gguf.vocab:Setting special token type pad to 32009\n",
      "INFO:gguf.vocab:Setting add_bos_token to False\n",
      "INFO:gguf.vocab:Setting add_eos_token to False\n",
      "INFO:gguf.vocab:Setting chat_template to {% if 'role' in messages[0] %}{% for message in messages %}{% if message['role'] == 'user' %}{{'<|user|>\n",
      "' + message['content'] + '<|end|>\n",
      "'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>\n",
      "' + message['content'] + '<|end|>\n",
      "'}}{% else %}{{'<|' + message['role'] + '|>\n",
      "' + message['content'] + '<|end|>\n",
      "'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n",
      "' }}{% endif %}{% else %}{% for message in messages %}{% if message['from'] == 'human' %}{{'<|user|>\n",
      "' + message['value'] + '<|end|>\n",
      "'}}{% elif message['from'] == 'gpt' %}{{'<|assistant|>\n",
      "' + message['value'] + '<|end|>\n",
      "'}}{% else %}{{'<|' + message['from'] + '|>\n",
      "' + message['value'] + '<|end|>\n",
      "'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n",
      "' }}{% endif %}{% endif %}\n",
      "INFO:hf-to-gguf:Set model quantization version\n",
      "INFO:gguf.gguf_writer:Writing the following files:\n",
      "INFO:gguf.gguf_writer:arbinMichael/phimed/unsloth.BF16.gguf: n_tensors = 363, total_size = 27.9G\n",
      "Writing: 100%|██████████| 27.9G/27.9G [04:26<00:00, 105Mbyte/s] \n",
      "INFO:hf-to-gguf:Model successfully exported to arbinMichael/phimed/unsloth.BF16.gguf\n",
      "Unsloth: Conversion completed! Output location: ./arbinMichael/phimed/unsloth.BF16.gguf\n",
      "Unsloth: [2] Converting GGUF 16bit into q4_k_m. This will take 20 minutes...\n",
      "main: build = 3651 (8f1d81a0)\n",
      "main: built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu\n",
      "main: quantizing './arbinMichael/phimed/unsloth.BF16.gguf' to './arbinMichael/phimed/unsloth.Q4_K_M.gguf' as Q4_K_M using 24 threads\n",
      "llama_model_loader: loaded meta data with 34 key-value pairs and 363 tensors from ./arbinMichael/phimed/unsloth.BF16.gguf (version GGUF V3 (latest))\n",
      "llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.\n",
      "llama_model_loader: - kv   0:                       general.architecture str              = llama\n",
      "llama_model_loader: - kv   1:                               general.type str              = model\n",
      "llama_model_loader: - kv   2:                               general.name str              = Phi 3 Medium 4k Instruct Bnb 4bit\n",
      "llama_model_loader: - kv   3:                       general.organization str              = Unsloth\n",
      "llama_model_loader: - kv   4:                           general.finetune str              = 4k-instruct-bnb-4bit\n",
      "llama_model_loader: - kv   5:                           general.basename str              = phi-3\n",
      "llama_model_loader: - kv   6:                         general.size_label str              = medium\n",
      "llama_model_loader: - kv   7:                          llama.block_count u32              = 40\n",
      "llama_model_loader: - kv   8:                       llama.context_length u32              = 4096\n",
      "llama_model_loader: - kv   9:                     llama.embedding_length u32              = 5120\n",
      "llama_model_loader: - kv  10:                  llama.feed_forward_length u32              = 17920\n",
      "llama_model_loader: - kv  11:                 llama.attention.head_count u32              = 40\n",
      "llama_model_loader: - kv  12:              llama.attention.head_count_kv u32              = 10\n",
      "llama_model_loader: - kv  13:                       llama.rope.freq_base f32              = 10000.000000\n",
      "llama_model_loader: - kv  14:     llama.attention.layer_norm_rms_epsilon f32              = 0.000010\n",
      "llama_model_loader: - kv  15:                 llama.attention.key_length u32              = 128\n",
      "llama_model_loader: - kv  16:               llama.attention.value_length u32              = 128\n",
      "llama_model_loader: - kv  17:                          general.file_type u32              = 32\n",
      "llama_model_loader: - kv  18:                           llama.vocab_size u32              = 32064\n",
      "llama_model_loader: - kv  19:                 llama.rope.dimension_count u32              = 128\n",
      "llama_model_loader: - kv  20:            tokenizer.ggml.add_space_prefix bool             = false\n",
      "llama_model_loader: - kv  21:                       tokenizer.ggml.model str              = llama\n",
      "llama_model_loader: - kv  22:                         tokenizer.ggml.pre str              = default\n",
      "llama_model_loader: - kv  23:                      tokenizer.ggml.tokens arr[str,32064]   = [\"<unk>\", \"<s>\", \"</s>\", \"<0x00>\", \"<...\n",
      "llama_model_loader: - kv  24:                      tokenizer.ggml.scores arr[f32,32064]   = [-1000.000000, -1000.000000, -1000.00...\n",
      "llama_model_loader: - kv  25:                  tokenizer.ggml.token_type arr[i32,32064]   = [3, 3, 4, 6, 6, 6, 6, 6, 6, 6, 6, 6, ...\n",
      "llama_model_loader: - kv  26:                tokenizer.ggml.bos_token_id u32              = 1\n",
      "llama_model_loader: - kv  27:                tokenizer.ggml.eos_token_id u32              = 32000\n",
      "llama_model_loader: - kv  28:            tokenizer.ggml.unknown_token_id u32              = 0\n",
      "llama_model_loader: - kv  29:            tokenizer.ggml.padding_token_id u32              = 32009\n",
      "llama_model_loader: - kv  30:               tokenizer.ggml.add_bos_token bool             = false\n",
      "llama_model_loader: - kv  31:               tokenizer.ggml.add_eos_token bool             = false\n",
      "llama_model_loader: - kv  32:                    tokenizer.chat_template str              = {% if 'role' in messages[0] %}{% for ...\n",
      "llama_model_loader: - kv  33:               general.quantization_version u32              = 2\n",
      "llama_model_loader: - type  f32:   81 tensors\n",
      "llama_model_loader: - type bf16:  282 tensors\n",
      "[   1/ 363]                    token_embd.weight - [ 5120, 32064,     1,     1], type =   bf16, converting to q4_K .. size =   313.12 MiB ->    88.07 MiB\n",
      "[   2/ 363]               blk.0.attn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[   3/ 363]                blk.0.ffn_down.weight - [17920,  5120,     1,     1], type =   bf16, converting to q6_K .. size =   175.00 MiB ->    71.78 MiB\n",
      "[   4/ 363]                blk.0.ffn_gate.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[   5/ 363]                  blk.0.ffn_up.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[   6/ 363]                blk.0.ffn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[   7/ 363]                  blk.0.attn_k.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[   8/ 363]             blk.0.attn_output.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[   9/ 363]                  blk.0.attn_q.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[  10/ 363]                  blk.0.attn_v.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q6_K .. size =    12.50 MiB ->     5.13 MiB\n",
      "[  11/ 363]               blk.1.attn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[  12/ 363]                blk.1.ffn_down.weight - [17920,  5120,     1,     1], type =   bf16, converting to q6_K .. size =   175.00 MiB ->    71.78 MiB\n",
      "[  13/ 363]                blk.1.ffn_gate.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[  14/ 363]                  blk.1.ffn_up.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[  15/ 363]                blk.1.ffn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[  16/ 363]                  blk.1.attn_k.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[  17/ 363]             blk.1.attn_output.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[  18/ 363]                  blk.1.attn_q.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[  19/ 363]                  blk.1.attn_v.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q6_K .. size =    12.50 MiB ->     5.13 MiB\n",
      "[  20/ 363]               blk.2.attn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[  21/ 363]                blk.2.ffn_down.weight - [17920,  5120,     1,     1], type =   bf16, converting to q6_K .. size =   175.00 MiB ->    71.78 MiB\n",
      "[  22/ 363]                blk.2.ffn_gate.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[  23/ 363]                  blk.2.ffn_up.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[  24/ 363]                blk.2.ffn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[  25/ 363]                  blk.2.attn_k.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[  26/ 363]             blk.2.attn_output.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[  27/ 363]                  blk.2.attn_q.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[  28/ 363]                  blk.2.attn_v.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q6_K .. size =    12.50 MiB ->     5.13 MiB\n",
      "[  29/ 363]               blk.3.attn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[  30/ 363]                blk.3.ffn_down.weight - [17920,  5120,     1,     1], type =   bf16, converting to q6_K .. size =   175.00 MiB ->    71.78 MiB\n",
      "[  31/ 363]                blk.3.ffn_gate.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[  32/ 363]                  blk.3.ffn_up.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[  33/ 363]                blk.3.ffn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[  34/ 363]                  blk.3.attn_k.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[  35/ 363]             blk.3.attn_output.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[  36/ 363]                  blk.3.attn_q.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[  37/ 363]                  blk.3.attn_v.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q6_K .. size =    12.50 MiB ->     5.13 MiB\n",
      "[  38/ 363]               blk.4.attn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[  39/ 363]                blk.4.ffn_down.weight - [17920,  5120,     1,     1], type =   bf16, converting to q6_K .. size =   175.00 MiB ->    71.78 MiB\n",
      "[  40/ 363]                blk.4.ffn_gate.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[  41/ 363]                  blk.4.ffn_up.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[  42/ 363]                blk.4.ffn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[  43/ 363]                  blk.4.attn_k.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[  44/ 363]             blk.4.attn_output.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[  45/ 363]                  blk.4.attn_q.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[  46/ 363]                  blk.4.attn_v.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q6_K .. size =    12.50 MiB ->     5.13 MiB\n",
      "[  47/ 363]               blk.5.attn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[  48/ 363]                blk.5.ffn_down.weight - [17920,  5120,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[  49/ 363]                blk.5.ffn_gate.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[  50/ 363]                  blk.5.ffn_up.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[  51/ 363]                blk.5.ffn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[  52/ 363]                  blk.5.attn_k.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[  53/ 363]             blk.5.attn_output.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[  54/ 363]                  blk.5.attn_q.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[  55/ 363]                  blk.5.attn_v.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[  56/ 363]                blk.6.ffn_gate.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[  57/ 363]                  blk.6.ffn_up.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[  58/ 363]                  blk.6.attn_k.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[  59/ 363]             blk.6.attn_output.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[  60/ 363]                  blk.6.attn_q.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[  61/ 363]                  blk.6.attn_v.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[  62/ 363]              blk.10.attn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[  63/ 363]               blk.10.ffn_down.weight - [17920,  5120,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[  64/ 363]               blk.10.ffn_gate.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[  65/ 363]                 blk.10.ffn_up.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[  66/ 363]               blk.10.ffn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[  67/ 363]                 blk.10.attn_k.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[  68/ 363]            blk.10.attn_output.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[  69/ 363]                 blk.10.attn_q.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[  70/ 363]                 blk.10.attn_v.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q6_K .. size =    12.50 MiB ->     5.13 MiB\n",
      "[  71/ 363]              blk.11.attn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[  72/ 363]               blk.11.ffn_down.weight - [17920,  5120,     1,     1], type =   bf16, converting to q6_K .. size =   175.00 MiB ->    71.78 MiB\n",
      "[  73/ 363]               blk.11.ffn_gate.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[  74/ 363]                 blk.11.ffn_up.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[  75/ 363]               blk.11.ffn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[  76/ 363]                 blk.11.attn_k.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[  77/ 363]            blk.11.attn_output.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[  78/ 363]                 blk.11.attn_q.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[  79/ 363]                 blk.11.attn_v.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[  80/ 363]              blk.12.attn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[  81/ 363]               blk.12.ffn_down.weight - [17920,  5120,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[  82/ 363]               blk.12.ffn_gate.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[  83/ 363]                 blk.12.ffn_up.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[  84/ 363]               blk.12.ffn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[  85/ 363]                 blk.12.attn_k.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[  86/ 363]            blk.12.attn_output.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[  87/ 363]                 blk.12.attn_q.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[  88/ 363]                 blk.12.attn_v.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[  89/ 363]              blk.13.attn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[  90/ 363]               blk.13.ffn_down.weight - [17920,  5120,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[  91/ 363]               blk.13.ffn_gate.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[  92/ 363]                 blk.13.ffn_up.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[  93/ 363]               blk.13.ffn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[  94/ 363]                 blk.13.attn_k.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[  95/ 363]            blk.13.attn_output.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[  96/ 363]                 blk.13.attn_q.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[  97/ 363]                 blk.13.attn_v.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q6_K .. size =    12.50 MiB ->     5.13 MiB\n",
      "[  98/ 363]               blk.6.attn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[  99/ 363]                blk.6.ffn_down.weight - [17920,  5120,     1,     1], type =   bf16, converting to q6_K .. size =   175.00 MiB ->    71.78 MiB\n",
      "[ 100/ 363]                blk.6.ffn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 101/ 363]               blk.7.attn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 102/ 363]                blk.7.ffn_down.weight - [17920,  5120,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 103/ 363]                blk.7.ffn_gate.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 104/ 363]                  blk.7.ffn_up.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 105/ 363]                blk.7.ffn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 106/ 363]                  blk.7.attn_k.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[ 107/ 363]             blk.7.attn_output.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 108/ 363]                  blk.7.attn_q.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 109/ 363]                  blk.7.attn_v.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[ 110/ 363]               blk.8.attn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 111/ 363]                blk.8.ffn_down.weight - [17920,  5120,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 112/ 363]                blk.8.ffn_gate.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 113/ 363]                  blk.8.ffn_up.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 114/ 363]                blk.8.ffn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 115/ 363]                  blk.8.attn_k.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[ 116/ 363]             blk.8.attn_output.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 117/ 363]                  blk.8.attn_q.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 118/ 363]                  blk.8.attn_v.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[ 119/ 363]               blk.9.attn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 120/ 363]                blk.9.ffn_down.weight - [17920,  5120,     1,     1], type =   bf16, converting to q6_K .. size =   175.00 MiB ->    71.78 MiB\n",
      "[ 121/ 363]                blk.9.ffn_gate.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 122/ 363]                  blk.9.ffn_up.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 123/ 363]                blk.9.ffn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 124/ 363]                  blk.9.attn_k.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[ 125/ 363]             blk.9.attn_output.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 126/ 363]                  blk.9.attn_q.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 127/ 363]                  blk.9.attn_v.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q6_K .. size =    12.50 MiB ->     5.13 MiB\n",
      "[ 128/ 363]              blk.14.attn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 129/ 363]               blk.14.ffn_down.weight - [17920,  5120,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 130/ 363]               blk.14.ffn_gate.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 131/ 363]                 blk.14.ffn_up.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 132/ 363]               blk.14.ffn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 133/ 363]                 blk.14.attn_k.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[ 134/ 363]            blk.14.attn_output.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 135/ 363]                 blk.14.attn_q.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 136/ 363]                 blk.14.attn_v.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[ 137/ 363]              blk.15.attn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 138/ 363]               blk.15.ffn_down.weight - [17920,  5120,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 139/ 363]               blk.15.ffn_gate.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 140/ 363]                 blk.15.ffn_up.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 141/ 363]               blk.15.ffn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 142/ 363]                 blk.15.attn_k.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[ 143/ 363]            blk.15.attn_output.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 144/ 363]                 blk.15.attn_q.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 145/ 363]                 blk.15.attn_v.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[ 146/ 363]              blk.16.attn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 147/ 363]               blk.16.ffn_down.weight - [17920,  5120,     1,     1], type =   bf16, converting to q6_K .. size =   175.00 MiB ->    71.78 MiB\n",
      "[ 148/ 363]               blk.16.ffn_gate.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 149/ 363]                 blk.16.ffn_up.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 150/ 363]               blk.16.ffn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 151/ 363]                 blk.16.attn_k.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[ 152/ 363]            blk.16.attn_output.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 153/ 363]                 blk.16.attn_q.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 154/ 363]                 blk.16.attn_v.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q6_K .. size =    12.50 MiB ->     5.13 MiB\n",
      "[ 155/ 363]              blk.17.attn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 156/ 363]               blk.17.ffn_down.weight - [17920,  5120,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 157/ 363]               blk.17.ffn_gate.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 158/ 363]                 blk.17.ffn_up.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 159/ 363]               blk.17.ffn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 160/ 363]                 blk.17.attn_k.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[ 161/ 363]            blk.17.attn_output.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 162/ 363]                 blk.17.attn_q.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 163/ 363]                 blk.17.attn_v.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[ 164/ 363]              blk.18.attn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 165/ 363]               blk.18.ffn_down.weight - [17920,  5120,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 166/ 363]               blk.18.ffn_gate.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 167/ 363]                 blk.18.ffn_up.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 168/ 363]               blk.18.ffn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 169/ 363]                 blk.18.attn_k.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[ 170/ 363]            blk.18.attn_output.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 171/ 363]                 blk.18.attn_q.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 172/ 363]                 blk.18.attn_v.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[ 173/ 363]              blk.19.attn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 174/ 363]               blk.19.ffn_down.weight - [17920,  5120,     1,     1], type =   bf16, converting to q6_K .. size =   175.00 MiB ->    71.78 MiB\n",
      "[ 175/ 363]               blk.19.ffn_gate.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 176/ 363]                 blk.19.ffn_up.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 177/ 363]               blk.19.ffn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 178/ 363]                 blk.19.attn_k.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[ 179/ 363]            blk.19.attn_output.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 180/ 363]                 blk.19.attn_q.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 181/ 363]                 blk.19.attn_v.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q6_K .. size =    12.50 MiB ->     5.13 MiB\n",
      "[ 182/ 363]              blk.20.attn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 183/ 363]               blk.20.ffn_down.weight - [17920,  5120,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 184/ 363]               blk.20.ffn_gate.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 185/ 363]                 blk.20.ffn_up.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 186/ 363]               blk.20.ffn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 187/ 363]                 blk.20.attn_k.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[ 188/ 363]            blk.20.attn_output.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 189/ 363]                 blk.20.attn_q.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 190/ 363]                 blk.20.attn_v.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[ 191/ 363]                 blk.21.attn_k.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[ 192/ 363]            blk.21.attn_output.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 193/ 363]                 blk.21.attn_q.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 194/ 363]                 blk.21.attn_v.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[ 195/ 363]              blk.21.attn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 196/ 363]               blk.21.ffn_down.weight - [17920,  5120,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 197/ 363]               blk.21.ffn_gate.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 198/ 363]                 blk.21.ffn_up.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 199/ 363]               blk.21.ffn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 200/ 363]              blk.22.attn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 201/ 363]               blk.22.ffn_down.weight - [17920,  5120,     1,     1], type =   bf16, converting to q6_K .. size =   175.00 MiB ->    71.78 MiB\n",
      "[ 202/ 363]               blk.22.ffn_gate.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 203/ 363]                 blk.22.ffn_up.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 204/ 363]               blk.22.ffn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 205/ 363]                 blk.22.attn_k.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[ 206/ 363]            blk.22.attn_output.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 207/ 363]                 blk.22.attn_q.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 208/ 363]                 blk.22.attn_v.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q6_K .. size =    12.50 MiB ->     5.13 MiB\n",
      "[ 209/ 363]              blk.23.attn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 210/ 363]               blk.23.ffn_down.weight - [17920,  5120,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 211/ 363]               blk.23.ffn_gate.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 212/ 363]                 blk.23.ffn_up.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 213/ 363]               blk.23.ffn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 214/ 363]                 blk.23.attn_k.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[ 215/ 363]            blk.23.attn_output.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 216/ 363]                 blk.23.attn_q.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 217/ 363]                 blk.23.attn_v.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[ 218/ 363]              blk.24.attn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 219/ 363]               blk.24.ffn_down.weight - [17920,  5120,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 220/ 363]               blk.24.ffn_gate.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 221/ 363]                 blk.24.ffn_up.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 222/ 363]               blk.24.ffn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 223/ 363]                 blk.24.attn_k.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[ 224/ 363]            blk.24.attn_output.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 225/ 363]                 blk.24.attn_q.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 226/ 363]                 blk.24.attn_v.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[ 227/ 363]              blk.25.attn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 228/ 363]               blk.25.ffn_down.weight - [17920,  5120,     1,     1], type =   bf16, converting to q6_K .. size =   175.00 MiB ->    71.78 MiB\n",
      "[ 229/ 363]               blk.25.ffn_gate.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 230/ 363]                 blk.25.ffn_up.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 231/ 363]               blk.25.ffn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 232/ 363]                 blk.25.attn_k.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[ 233/ 363]            blk.25.attn_output.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 234/ 363]                 blk.25.attn_q.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 235/ 363]                 blk.25.attn_v.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q6_K .. size =    12.50 MiB ->     5.13 MiB\n",
      "[ 236/ 363]              blk.26.attn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 237/ 363]               blk.26.ffn_down.weight - [17920,  5120,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 238/ 363]               blk.26.ffn_gate.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 239/ 363]                 blk.26.ffn_up.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 240/ 363]               blk.26.ffn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 241/ 363]                 blk.26.attn_k.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[ 242/ 363]            blk.26.attn_output.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 243/ 363]                 blk.26.attn_q.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 244/ 363]                 blk.26.attn_v.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[ 245/ 363]              blk.27.attn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 246/ 363]               blk.27.ffn_down.weight - [17920,  5120,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 247/ 363]               blk.27.ffn_gate.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 248/ 363]                 blk.27.ffn_up.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 249/ 363]               blk.27.ffn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 250/ 363]                 blk.27.attn_k.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[ 251/ 363]            blk.27.attn_output.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 252/ 363]                 blk.27.attn_q.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 253/ 363]                 blk.27.attn_v.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[ 254/ 363]               blk.28.ffn_gate.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 255/ 363]                 blk.28.attn_k.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[ 256/ 363]            blk.28.attn_output.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 257/ 363]                 blk.28.attn_q.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 258/ 363]                 blk.28.attn_v.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q6_K .. size =    12.50 MiB ->     5.13 MiB\n",
      "[ 259/ 363]              blk.28.attn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 260/ 363]               blk.28.ffn_down.weight - [17920,  5120,     1,     1], type =   bf16, converting to q6_K .. size =   175.00 MiB ->    71.78 MiB\n",
      "[ 261/ 363]                 blk.28.ffn_up.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 262/ 363]               blk.28.ffn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 263/ 363]              blk.29.attn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 264/ 363]               blk.29.ffn_down.weight - [17920,  5120,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 265/ 363]               blk.29.ffn_gate.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 266/ 363]                 blk.29.ffn_up.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 267/ 363]               blk.29.ffn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 268/ 363]                 blk.29.attn_k.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[ 269/ 363]            blk.29.attn_output.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 270/ 363]                 blk.29.attn_q.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 271/ 363]                 blk.29.attn_v.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[ 272/ 363]              blk.30.attn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 273/ 363]               blk.30.ffn_down.weight - [17920,  5120,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 274/ 363]               blk.30.ffn_gate.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 275/ 363]                 blk.30.ffn_up.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 276/ 363]               blk.30.ffn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 277/ 363]                 blk.30.attn_k.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[ 278/ 363]            blk.30.attn_output.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 279/ 363]                 blk.30.attn_q.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 280/ 363]                 blk.30.attn_v.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[ 281/ 363]              blk.31.attn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 282/ 363]               blk.31.ffn_down.weight - [17920,  5120,     1,     1], type =   bf16, converting to q6_K .. size =   175.00 MiB ->    71.78 MiB\n",
      "[ 283/ 363]               blk.31.ffn_gate.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 284/ 363]                 blk.31.ffn_up.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 285/ 363]               blk.31.ffn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 286/ 363]                 blk.31.attn_k.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[ 287/ 363]            blk.31.attn_output.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 288/ 363]                 blk.31.attn_q.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 289/ 363]                 blk.31.attn_v.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q6_K .. size =    12.50 MiB ->     5.13 MiB\n",
      "[ 290/ 363]              blk.32.attn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 291/ 363]               blk.32.ffn_down.weight - [17920,  5120,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 292/ 363]               blk.32.ffn_gate.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 293/ 363]                 blk.32.ffn_up.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 294/ 363]               blk.32.ffn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 295/ 363]                 blk.32.attn_k.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[ 296/ 363]            blk.32.attn_output.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 297/ 363]                 blk.32.attn_q.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 298/ 363]                 blk.32.attn_v.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[ 299/ 363]              blk.33.attn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 300/ 363]               blk.33.ffn_down.weight - [17920,  5120,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 301/ 363]               blk.33.ffn_gate.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 302/ 363]                 blk.33.ffn_up.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 303/ 363]               blk.33.ffn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 304/ 363]                 blk.33.attn_k.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[ 305/ 363]            blk.33.attn_output.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 306/ 363]                 blk.33.attn_q.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 307/ 363]                 blk.33.attn_v.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[ 308/ 363]              blk.34.attn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 309/ 363]               blk.34.ffn_down.weight - [17920,  5120,     1,     1], type =   bf16, converting to q6_K .. size =   175.00 MiB ->    71.78 MiB\n",
      "[ 310/ 363]               blk.34.ffn_gate.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 311/ 363]                 blk.34.ffn_up.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 312/ 363]               blk.34.ffn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 313/ 363]                 blk.34.attn_k.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[ 314/ 363]            blk.34.attn_output.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 315/ 363]                 blk.34.attn_q.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 316/ 363]                 blk.34.attn_v.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q6_K .. size =    12.50 MiB ->     5.13 MiB\n",
      "[ 317/ 363]               blk.35.ffn_gate.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 318/ 363]                 blk.35.ffn_up.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 319/ 363]                 blk.35.attn_k.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[ 320/ 363]            blk.35.attn_output.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 321/ 363]                 blk.35.attn_q.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 322/ 363]                 blk.35.attn_v.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q6_K .. size =    12.50 MiB ->     5.13 MiB\n",
      "[ 323/ 363]                        output.weight - [ 5120, 32064,     1,     1], type =   bf16, converting to q6_K .. size =   313.12 MiB ->   128.43 MiB\n",
      "[ 324/ 363]              blk.35.attn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 325/ 363]               blk.35.ffn_down.weight - [17920,  5120,     1,     1], type =   bf16, converting to q6_K .. size =   175.00 MiB ->    71.78 MiB\n",
      "[ 326/ 363]               blk.35.ffn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 327/ 363]              blk.36.attn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 328/ 363]               blk.36.ffn_down.weight - [17920,  5120,     1,     1], type =   bf16, converting to q6_K .. size =   175.00 MiB ->    71.78 MiB\n",
      "[ 329/ 363]               blk.36.ffn_gate.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 330/ 363]                 blk.36.ffn_up.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 331/ 363]               blk.36.ffn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 332/ 363]                 blk.36.attn_k.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[ 333/ 363]            blk.36.attn_output.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 334/ 363]                 blk.36.attn_q.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 335/ 363]                 blk.36.attn_v.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q6_K .. size =    12.50 MiB ->     5.13 MiB\n",
      "[ 336/ 363]              blk.37.attn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 337/ 363]               blk.37.ffn_down.weight - [17920,  5120,     1,     1], type =   bf16, converting to q6_K .. size =   175.00 MiB ->    71.78 MiB\n",
      "[ 338/ 363]               blk.37.ffn_gate.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 339/ 363]                 blk.37.ffn_up.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 340/ 363]               blk.37.ffn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 341/ 363]                 blk.37.attn_k.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[ 342/ 363]            blk.37.attn_output.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 343/ 363]                 blk.37.attn_q.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 344/ 363]                 blk.37.attn_v.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q6_K .. size =    12.50 MiB ->     5.13 MiB\n",
      "[ 345/ 363]              blk.38.attn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 346/ 363]               blk.38.ffn_down.weight - [17920,  5120,     1,     1], type =   bf16, converting to q6_K .. size =   175.00 MiB ->    71.78 MiB\n",
      "[ 347/ 363]               blk.38.ffn_gate.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 348/ 363]                 blk.38.ffn_up.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 349/ 363]               blk.38.ffn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 350/ 363]                 blk.38.attn_k.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[ 351/ 363]            blk.38.attn_output.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 352/ 363]                 blk.38.attn_q.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 353/ 363]                 blk.38.attn_v.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q6_K .. size =    12.50 MiB ->     5.13 MiB\n",
      "[ 354/ 363]              blk.39.attn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 355/ 363]               blk.39.ffn_down.weight - [17920,  5120,     1,     1], type =   bf16, converting to q6_K .. size =   175.00 MiB ->    71.78 MiB\n",
      "[ 356/ 363]               blk.39.ffn_gate.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 357/ 363]                 blk.39.ffn_up.weight - [ 5120, 17920,     1,     1], type =   bf16, converting to q4_K .. size =   175.00 MiB ->    49.22 MiB\n",
      "[ 358/ 363]               blk.39.ffn_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "[ 359/ 363]                 blk.39.attn_k.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q4_K .. size =    12.50 MiB ->     3.52 MiB\n",
      "[ 360/ 363]            blk.39.attn_output.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 361/ 363]                 blk.39.attn_q.weight - [ 5120,  5120,     1,     1], type =   bf16, converting to q4_K .. size =    50.00 MiB ->    14.06 MiB\n",
      "[ 362/ 363]                 blk.39.attn_v.weight - [ 5120,  1280,     1,     1], type =   bf16, converting to q6_K .. size =    12.50 MiB ->     5.13 MiB\n",
      "[ 363/ 363]                   output_norm.weight - [ 5120,     1,     1,     1], type =    f32, size =    0.020 MB\n",
      "llama_model_quantize_internal: model size  = 26627.83 MB\n",
      "llama_model_quantize_internal: quant size  =  8013.98 MB\n",
      "\n",
      "main: quantize time = 403335.17 ms\n",
      "main:    total time = 403335.17 ms\n",
      "Unsloth: Conversion completed! Output location: ./arbinMichael/phimed/unsloth.Q4_K_M.gguf\n",
      "Unsloth: Saved Ollama Modelfile to arbinMichael/phimed/Modelfile\n",
      "Unsloth: Uploading GGUF to Huggingface Hub...\n"
     ]
    },
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[12], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m model\u001b[38;5;241m.\u001b[39mpush_to_hub_gguf(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124marbinMichael/phimed\u001b[39m\u001b[38;5;124m\"\u001b[39m, tokenizer, quantization_method \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mq4_k_m\u001b[39m\u001b[38;5;124m\"\u001b[39m, token \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhf_vtFNaIAElPdJZAlrlodgjoAcnxofiuDpaH\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
      "File \u001b[0;32m~/.conda/envs/ft5/lib/python3.11/site-packages/unsloth/save.py:1830\u001b[0m, in \u001b[0;36munsloth_push_to_hub_gguf\u001b[0;34m(self, repo_id, tokenizer, quantization_method, first_conversion, use_temp_dir, commit_message, private, token, max_shard_size, create_pr, safe_serialization, revision, commit_description, tags, temporary_location, maximum_memory_usage)\u001b[0m\n\u001b[1;32m   1828\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m file_location \u001b[38;5;129;01min\u001b[39;00m all_file_locations:\n\u001b[1;32m   1829\u001b[0m     \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUnsloth: Uploading GGUF to Huggingface Hub...\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m-> 1830\u001b[0m     username \u001b[38;5;241m=\u001b[39m upload_to_huggingface(\n\u001b[1;32m   1831\u001b[0m         \u001b[38;5;28mself\u001b[39m, repo_id, token,\n\u001b[1;32m   1832\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mGGUF converted\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mgguf\u001b[39m\u001b[38;5;124m\"\u001b[39m, file_location, old_username, private,\n\u001b[1;32m   1833\u001b[0m     )\n\u001b[1;32m   1834\u001b[0m     link \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00musername\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mnew_save_directory\u001b[38;5;241m.\u001b[39mlstrip(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m/.\u001b[39m\u001b[38;5;124m'\u001b[39m)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \\\n\u001b[1;32m   1835\u001b[0m         \u001b[38;5;28;01mif\u001b[39;00m username \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m new_save_directory \u001b[38;5;28;01melse\u001b[39;00m \\\n\u001b[1;32m   1836\u001b[0m         new_save_directory\u001b[38;5;241m.\u001b[39mlstrip(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m/.\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m   1838\u001b[0m     \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSaved GGUF to https://huggingface.co/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mlink\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n",
      "File \u001b[0;32m~/.conda/envs/ft5/lib/python3.11/site-packages/unsloth/save.py:1416\u001b[0m, in \u001b[0;36mupload_to_huggingface\u001b[0;34m(model, save_directory, token, method, extra, file_location, old_username, private, create_config)\u001b[0m\n\u001b[1;32m   1413\u001b[0m     \u001b[38;5;28;01mpass\u001b[39;00m\n\u001b[1;32m   1414\u001b[0m \u001b[38;5;28;01mpass\u001b[39;00m\n\u001b[0;32m-> 1416\u001b[0m hf_api\u001b[38;5;241m.\u001b[39mupload_file(\n\u001b[1;32m   1417\u001b[0m     path_or_fileobj \u001b[38;5;241m=\u001b[39m file_location,\n\u001b[1;32m   1418\u001b[0m     path_in_repo    \u001b[38;5;241m=\u001b[39m uploaded_location,\n\u001b[1;32m   1419\u001b[0m     repo_id         \u001b[38;5;241m=\u001b[39m save_directory,\n\u001b[1;32m   1420\u001b[0m     repo_type       \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m   1421\u001b[0m     commit_message  \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m(Trained with Unsloth)\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m   1422\u001b[0m )\n\u001b[1;32m   1424\u001b[0m \u001b[38;5;66;03m# We also upload a config.json file\u001b[39;00m\n\u001b[1;32m   1425\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m create_config:\n",
      "File \u001b[0;32m~/.conda/envs/ft5/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py:114\u001b[0m, in \u001b[0;36mvalidate_hf_hub_args.<locals>._inner_fn\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    111\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m check_use_auth_token:\n\u001b[1;32m    112\u001b[0m     kwargs \u001b[38;5;241m=\u001b[39m smoothly_deprecate_use_auth_token(fn_name\u001b[38;5;241m=\u001b[39mfn\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, has_token\u001b[38;5;241m=\u001b[39mhas_token, kwargs\u001b[38;5;241m=\u001b[39mkwargs)\n\u001b[0;32m--> 114\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m fn(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
      "File \u001b[0;32m~/.conda/envs/ft5/lib/python3.11/site-packages/huggingface_hub/hf_api.py:1398\u001b[0m, in \u001b[0;36mfuture_compatible.<locals>._inner\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1395\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrun_as_future(fn, \u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m   1397\u001b[0m \u001b[38;5;66;03m# Otherwise, call the function normally\u001b[39;00m\n\u001b[0;32m-> 1398\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m fn(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
      "File \u001b[0;32m~/.conda/envs/ft5/lib/python3.11/site-packages/huggingface_hub/hf_api.py:4502\u001b[0m, in \u001b[0;36mHfApi.upload_file\u001b[0;34m(self, path_or_fileobj, path_in_repo, repo_id, token, repo_type, revision, commit_message, commit_description, create_pr, parent_commit, run_as_future)\u001b[0m\n\u001b[1;32m   4497\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mInvalid repo type, must be one of \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mREPO_TYPES\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m   4499\u001b[0m commit_message \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m   4500\u001b[0m     commit_message \u001b[38;5;28;01mif\u001b[39;00m commit_message \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUpload \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpath_in_repo\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m with huggingface_hub\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   4501\u001b[0m )\n\u001b[0;32m-> 4502\u001b[0m operation \u001b[38;5;241m=\u001b[39m CommitOperationAdd(\n\u001b[1;32m   4503\u001b[0m     path_or_fileobj\u001b[38;5;241m=\u001b[39mpath_or_fileobj,\n\u001b[1;32m   4504\u001b[0m     path_in_repo\u001b[38;5;241m=\u001b[39mpath_in_repo,\n\u001b[1;32m   4505\u001b[0m )\n\u001b[1;32m   4507\u001b[0m commit_info \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcreate_commit(\n\u001b[1;32m   4508\u001b[0m     repo_id\u001b[38;5;241m=\u001b[39mrepo_id,\n\u001b[1;32m   4509\u001b[0m     repo_type\u001b[38;5;241m=\u001b[39mrepo_type,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   4516\u001b[0m     parent_commit\u001b[38;5;241m=\u001b[39mparent_commit,\n\u001b[1;32m   4517\u001b[0m )\n\u001b[1;32m   4519\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m commit_info\u001b[38;5;241m.\u001b[39mpr_url \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
      "File \u001b[0;32m<string>:5\u001b[0m, in \u001b[0;36m__init__\u001b[0;34m(self, path_in_repo, path_or_fileobj)\u001b[0m\n",
      "File \u001b[0;32m~/.conda/envs/ft5/lib/python3.11/site-packages/huggingface_hub/_commit_api.py:189\u001b[0m, in \u001b[0;36mCommitOperationAdd.__post_init__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    187\u001b[0m \u001b[38;5;66;03m# Compute \"upload_info\" attribute\u001b[39;00m\n\u001b[1;32m    188\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpath_or_fileobj, \u001b[38;5;28mstr\u001b[39m):\n\u001b[0;32m--> 189\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mupload_info \u001b[38;5;241m=\u001b[39m UploadInfo\u001b[38;5;241m.\u001b[39mfrom_path(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpath_or_fileobj)\n\u001b[1;32m    190\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpath_or_fileobj, \u001b[38;5;28mbytes\u001b[39m):\n\u001b[1;32m    191\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mupload_info \u001b[38;5;241m=\u001b[39m UploadInfo\u001b[38;5;241m.\u001b[39mfrom_bytes(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpath_or_fileobj)\n",
      "File \u001b[0;32m~/.conda/envs/ft5/lib/python3.11/site-packages/huggingface_hub/lfs.py:84\u001b[0m, in \u001b[0;36mUploadInfo.from_path\u001b[0;34m(cls, path)\u001b[0m\n\u001b[1;32m     82\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m io\u001b[38;5;241m.\u001b[39mopen(path, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrb\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;28;01mas\u001b[39;00m file:\n\u001b[1;32m     83\u001b[0m     sample \u001b[38;5;241m=\u001b[39m file\u001b[38;5;241m.\u001b[39mpeek(\u001b[38;5;241m512\u001b[39m)[:\u001b[38;5;241m512\u001b[39m]\n\u001b[0;32m---> 84\u001b[0m     sha \u001b[38;5;241m=\u001b[39m sha_fileobj(file)\n\u001b[1;32m     85\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mcls\u001b[39m(size\u001b[38;5;241m=\u001b[39msize, sha256\u001b[38;5;241m=\u001b[39msha, sample\u001b[38;5;241m=\u001b[39msample)\n",
      "File \u001b[0;32m~/.conda/envs/ft5/lib/python3.11/site-packages/huggingface_hub/utils/sha.py:25\u001b[0m, in \u001b[0;36msha_fileobj\u001b[0;34m(fileobj, chunk_size)\u001b[0m\n\u001b[1;32m     23\u001b[0m sha \u001b[38;5;241m=\u001b[39m sha256()\n\u001b[1;32m     24\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[0;32m---> 25\u001b[0m     chunk \u001b[38;5;241m=\u001b[39m fileobj\u001b[38;5;241m.\u001b[39mread(chunk_size)\n\u001b[1;32m     26\u001b[0m     sha\u001b[38;5;241m.\u001b[39mupdate(chunk)\n\u001b[1;32m     27\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m chunk:\n",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "model.push_to_hub_gguf(\"arbinMichael/phimed\", tokenizer, quantization_method = \"q4_k_m\", token = \"\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "ca03f0a5-5d01-4a92-ac3d-6b53f4d8f6e2",
   "metadata": {},
   "outputs": [],
   "source": [
    "model.save_pretrained(\"lora_model_new\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "2e42b81e-e794-44f2-9002-358d8f8837f8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "('lora_model_new/tokenizer_config.json',\n",
       " 'lora_model_new/special_tokens_map.json',\n",
       " 'lora_model_new/tokenizer.model',\n",
       " 'lora_model_new/added_tokens.json',\n",
       " 'lora_model_new/tokenizer.json')"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tokenizer.save_pretrained(\"lora_model_new\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "70239d84-fd1e-4385-a9f6-39bbbcee8078",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "21\n"
     ]
    }
   ],
   "source": [
    "um = round(torch.cuda.max_memory_reserved()/1024/1024/1024,)\n",
    "print(um)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "33d4d882-2eec-4c12-957b-2f96150a2cc8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Peak reserved memory = 9.447 GB.\n"
     ]
    }
   ],
   "source": [
    "used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n",
    "print(f\"Peak reserved memory = {used_memory} GB.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8c2d42e5-df94-49db-acfc-71989060825a",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}