{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "60299a7f-6e86-4bd6-9dbf-250b42a264b9", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n", "==((====))== Unsloth 2024.8: Fast Llama patching. Transformers = 4.44.2.\n", " \\\\ /| GPU: NVIDIA GeForce RTX 3090. Max memory: 23.691 GB. Platform = Linux.\n", "O^O/ \\_/ \\ Pytorch: 2.3.0. CUDA = 8.6. CUDA Toolkit = 12.1.\n", "\\ / Bfloat16 = TRUE. FA [Xformers = 0.0.27. FA2 = False]\n", " \"-____-\" Free Apache license: http://github.com/unslothai/unsloth\n" ] } ], "source": [ "from unsloth import FastLanguageModel\n", "import torch\n", "max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!\n", "dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+\n", "load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.\n", "\n", "model, tokenizer = FastLanguageModel.from_pretrained(\n", " model_name = \"unsloth/Phi-3.5-mini-instruct\",\n", " max_seq_length = max_seq_length,\n", " dtype = dtype,\n", " load_in_4bit = load_in_4bit,\n", " # token = \"hf_...\", # use one if using gated models like meta-llama/Llama-2-7b-hf\n", ")" ] }, { "cell_type": "code", "execution_count": 3, "id": "8712c5c8-c763-4743-bc8d-54b879433b73", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Unsloth: Already have LoRA adapters! We shall skip this step.\n" ] } ], "source": [ "model = FastLanguageModel.get_peft_model(\n", " model,\n", " r = 32, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128\n", " target_modules = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n", " \"gate_proj\", \"up_proj\", \"down_proj\",],\n", " lora_alpha = 32,\n", " lora_dropout = 0, # Supports any, but = 0 is optimized\n", " bias = \"none\", # Supports any, but = \"none\" is optimized\n", " # [NEW] \"unsloth\" uses 30% less VRAM, fits 2x larger batch sizes!\n", " use_gradient_checkpointing = \"unsloth\", # True or \"unsloth\" for very long context\n", " random_state = 3407,\n", " use_rslora = False, # We support rank stabilized LoRA\n", " loftq_config = None, # And LoftQ\n", ")" ] }, { "cell_type": "code", "execution_count": 4, "id": "c9d36fef-4c62-412d-81a8-2769a1b56042", "metadata": {}, "outputs": [], "source": [ "from datasets import load_dataset\n", "dataset = load_dataset(\"arbinMichael/testparquet\", split = \"train\")" ] }, { "cell_type": "code", "execution_count": 5, "id": "452ad49e-b283-4655-9c99-f30c5eed681c", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "<|user|>Perpare a schedule for current charge/discharge test, the value of the current is a linear variable, using Current Ramp(A) control type. The charging current start value is 0.5A, the rate of change of the current per second is 0.01, up to 4V then ; discharge current start value is -0.5A, the rate of change of the current per second is -0.01, discharging to 1V then end the test. Record one point per second<|end|><|assistant|>[{\"StepCtrlTypeString\":\"Rest\",\"CtrlValue\":\"\",\"Label\":\"Step_A\",\"StepLimits\":[{\"Equations\":\"PV_CHAN_Step_Time>=5\",\"GotoStep\":\"Next Step\"}],\"LogLimits\":[{\"Equations\":\"DV_Time>=1\",\"GotoStep\":\"Next Step\"}]},{\"StepCtrlTypeString\":\"Current Ramp(A)\",\"CtrlValue\":\"0.5\",\"Label\":\"Step_B\",\"StepLimits\":[{\"Equations\":\"PV_CHAN_Voltage>=4\",\"GotoStep\":\"Next Step\"}],\"LogLimits\":[{\"Equations\":\"DV_Time>=1\",\"GotoStep\":\"Next Step\"}]},{\"StepCtrlTypeString\":\"Current Ramp(A)\",\"CtrlValue\":\"-0.5\",\"Label\":\"Step_C\",\"StepLimits\":[{\"Equations\":\"PV_CHAN_Voltage<=1\",\"GotoStep\":\"Next Step\"}],\"LogLimits\":[{\"Equations\":\"DV_Time>=1\",\"GotoStep\":\"Next Step\"}]}]<|end|>\n" ] } ], "source": [ "print(dataset[5][\"text\"])" ] }, { "cell_type": "code", "execution_count": 6, "id": "b0a39d9e-e3bf-4fae-8d75-dba12ccf15c8", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "max_steps is given, it will override any value given in num_train_epochs\n" ] } ], "source": [ "from trl import SFTTrainer\n", "from transformers import TrainingArguments\n", "from unsloth import is_bfloat16_supported\n", "\n", "trainer = SFTTrainer(\n", " model = model,\n", " tokenizer = tokenizer,\n", " train_dataset = dataset,\n", " dataset_text_field = \"text\",\n", " max_seq_length = max_seq_length,\n", " dataset_num_proc = 2,\n", " packing = False, # Can make training 5x faster for short sequences.\n", " args = TrainingArguments(\n", " per_device_train_batch_size = 2,\n", " gradient_accumulation_steps = 4,\n", " warmup_steps = 2,\n", " max_steps = 30,\n", " learning_rate = 2e-4,\n", " fp16 = not is_bfloat16_supported(),\n", " bf16 = is_bfloat16_supported(),\n", " logging_steps = 1,\n", " optim = \"adamw_8bit\",\n", " weight_decay = 0.01,\n", " lr_scheduler_type = \"linear\",\n", " seed = 7444,\n", " output_dir = \"outputs\",\n", " ),\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "625e8b31-82d8-4930-a46e-a82803b4f211", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "==((====))== Unsloth - 2x faster free finetuning | Num GPUs = 1\n", " \\\\ /| Num examples = 10 | Num Epochs = 30\n", "O^O/ \\_/ \\ Batch size per device = 2 | Gradient Accumulation steps = 4\n", "\\ / Total batch size = 8 | Total steps = 30\n", " \"-____-\" Number of trainable parameters = 59,768,832\n" ] }, { "data": { "text/html": [ "\n", "
\n", " \n", " \n", " [15/30 06:56 < 08:00, 0.03 it/s, Epoch 11.20/30]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StepTraining Loss
11.464500
21.716400
31.310200
41.266100
51.434100
61.185100
71.001900
81.157100
90.660400
100.998400
110.756400
120.931600
130.519000

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "trainer_stats = trainer.train()" ] }, { "cell_type": "code", "execution_count": 13, "id": "c407b9c0-aa4c-412a-b7cc-ddbdbb6a5212", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[{\"StepCtrlTypeString\":\"Rest\",\"CtrlValue\":\"\",\"Label\":\"Step_A\",\"StepLimits\":[{\"Equations\":\"PV_CHAN_Step_Time>=10\",\"GotoStep\":\"Next Step\"}],\"LogLimits\":[{\"Equations\":\"PV_CHAN_Time>=60\",\"GotoStep\":\"Next Step\"}]},{\"StepCtrlTypeString\":\"Internal Resistance\",\"CtrlValue\":\"0.5\",\"Label\":\"Step_B\",\"StepLimits\":[{\"Equations\":\"PV_CHAN_Step_Time>=3\",\"Goto\n" ] } ], "source": [ "from unsloth.chat_templates import get_chat_template\n", "\n", "tokenizer = get_chat_template(\n", " tokenizer,\n", " chat_template = \"phi-3\", # Supports zephyr, chatml, mistral, llama, alpaca, vicuna, vicuna_old, unsloth\n", " mapping = {\"role\" : \"from\", \"content\" : \"value\", \"user\" : \"human\", \"assistant\" : \"gpt\"}, # ShareGPT style\n", ")\n", "\n", "FastLanguageModel.for_inference(model) # Enable native 2x faster inference\n", "\n", "messages = [\n", " {\"from\": \"human\", \"value\": \"give me schedule that first step will log every 10 second and will last 60seconds, and second step will log every 3 second and will last 40 seconds\"},\n", "]\n", "inputs = tokenizer.apply_chat_template(\n", " messages,\n", " tokenize = True,\n", " add_generation_prompt = True, # Must add for generation\n", " return_tensors = \"pt\",\n", ").to(\"cuda\")\n", "\n", "from transformers import TextStreamer\n", "text_streamer = TextStreamer(tokenizer, skip_prompt = True)\n", "_ = model.generate(input_ids = inputs, streamer = text_streamer, max_new_tokens = 128, use_cache = True)" ] }, { "cell_type": "code", "execution_count": 8, "id": "069d4087-35c2-4d2e-b981-f5bc65bac44d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Unsloth: Merging 4bit and LoRA weights to 16bit...\n", "Unsloth: Will use up to 15.71 out of 28.5 RAM for saving.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "100%|███████████████████████████████████████████| 32/32 [00:06<00:00, 4.69it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Unsloth: Saving tokenizer... Done.\n", "Unsloth: Saving model... This might take 5 minutes for Llama-7b...\n", "Done.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unsloth: Converting llama model. Can use fast conversion = True.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "==((====))== Unsloth: Conversion from QLoRA to GGUF information\n", " \\\\ /| [0] Installing llama.cpp will take 3 minutes.\n", "O^O/ \\_/ \\ [1] Converting HF to GGUF 16bits will take 3 minutes.\n", "\\ / [2] Converting GGUF 16bits to ['q4_k_m'] will take 10 minutes each.\n", " \"-____-\" In total, you will have to wait at least 16 minutes.\n", "\n", "Unsloth: [0] Installing llama.cpp. This will take 3 minutes...\n", "Unsloth: [1] Converting model at testmodel into bf16 GGUF format.\n", "The output location will be ./testmodel/unsloth.BF16.gguf\n", "This will take 3 minutes...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unsloth: Extending testmodel/tokenizer.model with added_tokens.json.\n", "Originally tokenizer.model is of size (32000).\n", "But we need to extend to sentencepiece vocab size (32011).\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "INFO:hf-to-gguf:Loading model: testmodel\n", "INFO:gguf.gguf_writer:gguf: This GGUF file is for Little Endian only\n", "INFO:hf-to-gguf:Exporting model...\n", "INFO:hf-to-gguf:gguf: loading model weight map from 'model.safetensors.index.json'\n", "INFO:hf-to-gguf:gguf: loading model part 'model-00001-of-00002.safetensors'\n", "INFO:hf-to-gguf:token_embd.weight, torch.bfloat16 --> BF16, shape = {3072, 32064}\n", "INFO:hf-to-gguf:blk.0.attn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.0.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.0.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.0.ffn_up.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.0.ffn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.0.attn_k.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.0.attn_output.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.0.attn_q.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.0.attn_v.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.1.attn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.1.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.1.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.1.ffn_up.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.1.ffn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.1.attn_k.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.1.attn_output.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.1.attn_q.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.1.attn_v.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.10.attn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.10.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.10.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.10.ffn_up.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.10.ffn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.10.attn_k.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.10.attn_output.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.10.attn_q.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.10.attn_v.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.11.attn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.11.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.11.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.11.ffn_up.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.11.ffn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.11.attn_k.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.11.attn_output.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.11.attn_q.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.11.attn_v.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.12.attn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.12.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.12.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.12.ffn_up.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.12.ffn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.12.attn_k.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.12.attn_output.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.12.attn_q.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.12.attn_v.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.13.attn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.13.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.13.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.13.ffn_up.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.13.ffn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.13.attn_k.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.13.attn_output.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.13.attn_q.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.13.attn_v.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.14.attn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.14.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.14.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.14.ffn_up.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.14.ffn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.14.attn_k.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.14.attn_output.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.14.attn_q.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.14.attn_v.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.15.attn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.15.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.15.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.15.ffn_up.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.15.ffn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.15.attn_k.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.15.attn_output.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.15.attn_q.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.15.attn_v.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.16.attn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.16.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.16.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.16.ffn_up.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.16.ffn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.16.attn_k.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.16.attn_output.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.16.attn_q.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.16.attn_v.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.17.attn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.17.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.17.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.17.ffn_up.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.17.ffn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.17.attn_k.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.17.attn_output.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.17.attn_q.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.17.attn_v.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.18.attn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.18.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.18.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.18.ffn_up.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.18.ffn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.18.attn_k.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.18.attn_output.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.18.attn_q.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.18.attn_v.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.19.attn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.19.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.19.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.19.ffn_up.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.19.ffn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.19.attn_k.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.19.attn_output.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.19.attn_q.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.19.attn_v.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.2.attn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.2.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.2.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.2.ffn_up.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.2.ffn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.2.attn_k.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.2.attn_output.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.2.attn_q.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.2.attn_v.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.20.attn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.20.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.20.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.20.ffn_up.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.20.ffn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.20.attn_k.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.20.attn_output.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.20.attn_q.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.20.attn_v.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.21.attn_k.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.21.attn_q.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.3.attn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.3.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.3.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.3.ffn_up.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.3.ffn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.3.attn_k.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.3.attn_output.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.3.attn_q.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.3.attn_v.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.4.attn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.4.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.4.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.4.ffn_up.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.4.ffn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.4.attn_k.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.4.attn_output.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.4.attn_q.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.4.attn_v.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.5.attn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.5.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.5.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.5.ffn_up.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.5.ffn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.5.attn_k.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.5.attn_output.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.5.attn_q.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.5.attn_v.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.6.attn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.6.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.6.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.6.ffn_up.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.6.ffn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.6.attn_k.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.6.attn_output.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.6.attn_q.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.6.attn_v.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.7.attn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.7.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.7.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.7.ffn_up.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.7.ffn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.7.attn_k.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.7.attn_output.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.7.attn_q.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.7.attn_v.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.8.attn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.8.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.8.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.8.ffn_up.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.8.ffn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.8.attn_k.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.8.attn_output.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.8.attn_q.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.8.attn_v.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.9.attn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.9.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.9.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.9.ffn_up.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.9.ffn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.9.attn_k.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.9.attn_output.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.9.attn_q.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.9.attn_v.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:gguf: loading model part 'model-00002-of-00002.safetensors'\n", "INFO:hf-to-gguf:output.weight, torch.bfloat16 --> BF16, shape = {3072, 32064}\n", "INFO:hf-to-gguf:blk.21.attn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.21.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.21.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.21.ffn_up.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.21.ffn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.21.attn_output.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.21.attn_v.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.22.attn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.22.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.22.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.22.ffn_up.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.22.ffn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.22.attn_k.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.22.attn_output.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.22.attn_q.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.22.attn_v.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.23.attn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.23.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.23.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.23.ffn_up.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.23.ffn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.23.attn_k.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.23.attn_output.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.23.attn_q.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.23.attn_v.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.24.attn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.24.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.24.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.24.ffn_up.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.24.ffn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.24.attn_k.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.24.attn_output.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.24.attn_q.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.24.attn_v.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.25.attn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.25.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.25.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.25.ffn_up.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.25.ffn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.25.attn_k.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.25.attn_output.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.25.attn_q.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.25.attn_v.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.26.attn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.26.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.26.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.26.ffn_up.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.26.ffn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.26.attn_k.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.26.attn_output.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.26.attn_q.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.26.attn_v.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.27.attn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.27.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.27.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.27.ffn_up.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.27.ffn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.27.attn_k.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.27.attn_output.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.27.attn_q.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.27.attn_v.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.28.attn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.28.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.28.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.28.ffn_up.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.28.ffn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.28.attn_k.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.28.attn_output.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.28.attn_q.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.28.attn_v.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.29.attn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.29.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.29.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.29.ffn_up.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.29.ffn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.29.attn_k.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.29.attn_output.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.29.attn_q.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.29.attn_v.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.30.attn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.30.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.30.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.30.ffn_up.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.30.ffn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.30.attn_k.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.30.attn_output.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.30.attn_q.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.30.attn_v.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.31.attn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.31.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.31.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.31.ffn_up.weight, torch.bfloat16 --> BF16, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.31.ffn_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.31.attn_k.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.31.attn_output.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.31.attn_q.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.31.attn_v.weight, torch.bfloat16 --> BF16, shape = {3072, 3072}\n", "INFO:hf-to-gguf:output_norm.weight, torch.bfloat16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:Set meta model\n", "INFO:hf-to-gguf:Set model parameters\n", "INFO:hf-to-gguf:gguf: context length = 131072\n", "INFO:hf-to-gguf:gguf: embedding length = 3072\n", "INFO:hf-to-gguf:gguf: feed forward length = 8192\n", "INFO:hf-to-gguf:gguf: head count = 32\n", "INFO:hf-to-gguf:gguf: key-value head count = 32\n", "INFO:hf-to-gguf:gguf: rope theta = 10000.0\n", "INFO:hf-to-gguf:gguf: rms norm epsilon = 1e-05\n", "INFO:hf-to-gguf:gguf: file type = 32\n", "INFO:hf-to-gguf:Set model tokenizer\n", "INFO:gguf.vocab:Setting special token type bos to 1\n", "INFO:gguf.vocab:Setting special token type eos to 32000\n", "INFO:gguf.vocab:Setting special token type unk to 0\n", "INFO:gguf.vocab:Setting special token type pad to 32009\n", "INFO:gguf.vocab:Setting add_bos_token to False\n", "INFO:gguf.vocab:Setting add_eos_token to False\n", "INFO:gguf.vocab:Setting chat_template to {% if 'role' in messages[0] %}{% for message in messages %}{% if message['role'] == 'user' %}{{'<|user|>\n", "' + message['content'] + '<|end|>\n", "'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>\n", "' + message['content'] + '<|end|>\n", "'}}{% else %}{{'<|' + message['role'] + '|>\n", "' + message['content'] + '<|end|>\n", "'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n", "' }}{% endif %}{% else %}{% for message in messages %}{% if message['from'] == 'human' %}{{'<|user|>\n", "' + message['value'] + '<|end|>\n", "'}}{% elif message['from'] == 'gpt' %}{{'<|assistant|>\n", "' + message['value'] + '<|end|>\n", "'}}{% else %}{{'<|' + message['from'] + '|>\n", "' + message['value'] + '<|end|>\n", "'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n", "' }}{% endif %}{% endif %}\n", "INFO:hf-to-gguf:Set model quantization version\n", "INFO:gguf.gguf_writer:Writing the following files:\n", "INFO:gguf.gguf_writer:testmodel/unsloth.BF16.gguf: n_tensors = 291, total_size = 7.6G\n", "Writing: 100%|██████████| 7.64G/7.64G [00:54<00:00, 140Mbyte/s]\n", "INFO:hf-to-gguf:Model successfully exported to testmodel/unsloth.BF16.gguf\n", "Unsloth: Conversion completed! Output location: ./testmodel/unsloth.BF16.gguf\n", "Unsloth: [2] Converting GGUF 16bit into q4_k_m. This will take 20 minutes...\n", "main: build = 3651 (8f1d81a0)\n", "main: built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu\n", "main: quantizing './testmodel/unsloth.BF16.gguf' to './testmodel/unsloth.Q4_K_M.gguf' as Q4_K_M using 24 threads\n", "llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from ./testmodel/unsloth.BF16.gguf (version GGUF V3 (latest))\n", "llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.\n", "llama_model_loader: - kv 0: general.architecture str = llama\n", "llama_model_loader: - kv 1: general.type str = model\n", "llama_model_loader: - kv 2: general.name str = Phi 3.5 Mini Instruct Bnb 4bit\n", "llama_model_loader: - kv 3: general.organization str = Unsloth\n", "llama_model_loader: - kv 4: general.finetune str = instruct-bnb-4bit\n", "llama_model_loader: - kv 5: general.basename str = phi-3.5\n", "llama_model_loader: - kv 6: general.size_label str = mini\n", "llama_model_loader: - kv 7: llama.block_count u32 = 32\n", "llama_model_loader: - kv 8: llama.context_length u32 = 131072\n", "llama_model_loader: - kv 9: llama.embedding_length u32 = 3072\n", "llama_model_loader: - kv 10: llama.feed_forward_length u32 = 8192\n", "llama_model_loader: - kv 11: llama.attention.head_count u32 = 32\n", "llama_model_loader: - kv 12: llama.attention.head_count_kv u32 = 32\n", "llama_model_loader: - kv 13: llama.rope.freq_base f32 = 10000.000000\n", "llama_model_loader: - kv 14: llama.attention.layer_norm_rms_epsilon f32 = 0.000010\n", "llama_model_loader: - kv 15: general.file_type u32 = 32\n", "llama_model_loader: - kv 16: llama.vocab_size u32 = 32064\n", "llama_model_loader: - kv 17: llama.rope.dimension_count u32 = 96\n", "llama_model_loader: - kv 18: tokenizer.ggml.add_space_prefix bool = false\n", "llama_model_loader: - kv 19: tokenizer.ggml.model str = llama\n", "llama_model_loader: - kv 20: tokenizer.ggml.pre str = default\n", "llama_model_loader: - kv 21: tokenizer.ggml.tokens arr[str,32064] = [\"\", \"\", \"\", \"<0x00>\", \"<...\n", "llama_model_loader: - kv 22: tokenizer.ggml.scores arr[f32,32064] = [-1000.000000, -1000.000000, -1000.00...\n", "llama_model_loader: - kv 23: tokenizer.ggml.token_type arr[i32,32064] = [3, 3, 4, 6, 6, 6, 6, 6, 6, 6, 6, 6, ...\n", "llama_model_loader: - kv 24: tokenizer.ggml.bos_token_id u32 = 1\n", "llama_model_loader: - kv 25: tokenizer.ggml.eos_token_id u32 = 32000\n", "llama_model_loader: - kv 26: tokenizer.ggml.unknown_token_id u32 = 0\n", "llama_model_loader: - kv 27: tokenizer.ggml.padding_token_id u32 = 32009\n", "llama_model_loader: - kv 28: tokenizer.ggml.add_bos_token bool = false\n", "llama_model_loader: - kv 29: tokenizer.ggml.add_eos_token bool = false\n", "llama_model_loader: - kv 30: tokenizer.chat_template str = {% if 'role' in messages[0] %}{% for ...\n", "llama_model_loader: - kv 31: general.quantization_version u32 = 2\n", "llama_model_loader: - type f32: 65 tensors\n", "llama_model_loader: - type bf16: 226 tensors\n", "[ 1/ 291] token_embd.weight - [ 3072, 32064, 1, 1], type = bf16, converting to q4_K .. size = 187.88 MiB -> 52.84 MiB\n", "[ 2/ 291] blk.0.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 3/ 291] blk.0.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q6_K .. size = 48.00 MiB -> 19.69 MiB\n", "[ 4/ 291] blk.0.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 5/ 291] blk.0.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 6/ 291] blk.0.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 7/ 291] blk.0.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 8/ 291] blk.0.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 9/ 291] blk.0.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 10/ 291] blk.0.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q6_K .. size = 18.00 MiB -> 7.38 MiB\n", "[ 11/ 291] blk.1.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 12/ 291] blk.1.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q6_K .. size = 48.00 MiB -> 19.69 MiB\n", "[ 13/ 291] blk.1.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 14/ 291] blk.1.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 15/ 291] blk.1.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 16/ 291] blk.1.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 17/ 291] blk.1.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 18/ 291] blk.1.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 19/ 291] blk.1.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q6_K .. size = 18.00 MiB -> 7.38 MiB\n", "[ 20/ 291] blk.10.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 21/ 291] blk.10.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q6_K .. size = 48.00 MiB -> 19.69 MiB\n", "[ 22/ 291] blk.10.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 23/ 291] blk.10.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 24/ 291] blk.10.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 25/ 291] blk.10.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 26/ 291] blk.10.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 27/ 291] blk.10.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 28/ 291] blk.10.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q6_K .. size = 18.00 MiB -> 7.38 MiB\n", "[ 29/ 291] blk.11.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 30/ 291] blk.11.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q6_K .. size = 48.00 MiB -> 19.69 MiB\n", "[ 31/ 291] blk.11.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 32/ 291] blk.11.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 33/ 291] blk.11.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 34/ 291] blk.11.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 35/ 291] blk.11.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 36/ 291] blk.11.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 37/ 291] blk.11.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q6_K .. size = 18.00 MiB -> 7.38 MiB\n", "[ 38/ 291] blk.12.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 39/ 291] blk.12.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 40/ 291] blk.12.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 41/ 291] blk.12.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 42/ 291] blk.12.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 43/ 291] blk.12.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 44/ 291] blk.12.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 45/ 291] blk.12.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 46/ 291] blk.12.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 47/ 291] blk.13.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 48/ 291] blk.13.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 49/ 291] blk.13.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 50/ 291] blk.13.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 51/ 291] blk.13.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 52/ 291] blk.13.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 53/ 291] blk.13.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 54/ 291] blk.13.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 55/ 291] blk.13.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 56/ 291] blk.14.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 57/ 291] blk.14.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q6_K .. size = 48.00 MiB -> 19.69 MiB\n", "[ 58/ 291] blk.14.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 59/ 291] blk.14.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 60/ 291] blk.14.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 61/ 291] blk.14.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 62/ 291] blk.14.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 63/ 291] blk.14.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 64/ 291] blk.14.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q6_K .. size = 18.00 MiB -> 7.38 MiB\n", "[ 65/ 291] blk.15.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 66/ 291] blk.15.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 67/ 291] blk.15.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 68/ 291] blk.15.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 69/ 291] blk.15.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 70/ 291] blk.15.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 71/ 291] blk.15.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 72/ 291] blk.15.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 73/ 291] blk.15.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 74/ 291] blk.16.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 75/ 291] blk.16.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 76/ 291] blk.16.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 77/ 291] blk.16.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 78/ 291] blk.16.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 79/ 291] blk.16.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 80/ 291] blk.16.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 81/ 291] blk.16.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 82/ 291] blk.16.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 83/ 291] blk.17.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 84/ 291] blk.17.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q6_K .. size = 48.00 MiB -> 19.69 MiB\n", "[ 85/ 291] blk.17.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 86/ 291] blk.17.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 87/ 291] blk.17.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 88/ 291] blk.17.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 89/ 291] blk.17.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 90/ 291] blk.17.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 91/ 291] blk.17.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q6_K .. size = 18.00 MiB -> 7.38 MiB\n", "[ 92/ 291] blk.18.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 93/ 291] blk.18.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 94/ 291] blk.18.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 95/ 291] blk.18.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 96/ 291] blk.18.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 97/ 291] blk.18.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 98/ 291] blk.18.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 99/ 291] blk.18.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 100/ 291] blk.18.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 101/ 291] blk.19.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 102/ 291] blk.19.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 103/ 291] blk.19.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 104/ 291] blk.19.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 105/ 291] blk.19.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 106/ 291] blk.19.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 107/ 291] blk.19.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 108/ 291] blk.19.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 109/ 291] blk.19.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 110/ 291] blk.2.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 111/ 291] blk.2.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q6_K .. size = 48.00 MiB -> 19.69 MiB\n", "[ 112/ 291] blk.2.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 113/ 291] blk.2.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 114/ 291] blk.2.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 115/ 291] blk.2.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 116/ 291] blk.2.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 117/ 291] blk.2.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 118/ 291] blk.2.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q6_K .. size = 18.00 MiB -> 7.38 MiB\n", "[ 119/ 291] blk.20.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 120/ 291] blk.20.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 121/ 291] blk.20.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 122/ 291] blk.20.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 123/ 291] blk.20.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 124/ 291] blk.20.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 125/ 291] blk.20.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 126/ 291] blk.20.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 127/ 291] blk.20.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 128/ 291] blk.21.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 129/ 291] blk.21.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 130/ 291] blk.3.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 131/ 291] blk.3.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 132/ 291] blk.3.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 133/ 291] blk.3.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 134/ 291] blk.3.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 135/ 291] blk.3.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 136/ 291] blk.3.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 137/ 291] blk.3.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 138/ 291] blk.3.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 139/ 291] blk.4.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 140/ 291] blk.4.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q6_K .. size = 48.00 MiB -> 19.69 MiB\n", "[ 141/ 291] blk.4.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 142/ 291] blk.4.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 143/ 291] blk.4.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 144/ 291] blk.4.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 145/ 291] blk.4.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 146/ 291] blk.4.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 147/ 291] blk.4.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q6_K .. size = 18.00 MiB -> 7.38 MiB\n", "[ 148/ 291] blk.5.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 149/ 291] blk.5.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 150/ 291] blk.5.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 151/ 291] blk.5.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 152/ 291] blk.5.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 153/ 291] blk.5.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 154/ 291] blk.5.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 155/ 291] blk.5.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 156/ 291] blk.5.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 157/ 291] blk.6.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 158/ 291] blk.6.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 159/ 291] blk.6.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 160/ 291] blk.6.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 161/ 291] blk.6.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 162/ 291] blk.6.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 163/ 291] blk.6.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 164/ 291] blk.6.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 165/ 291] blk.6.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 166/ 291] blk.7.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 167/ 291] blk.7.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q6_K .. size = 48.00 MiB -> 19.69 MiB\n", "[ 168/ 291] blk.7.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 169/ 291] blk.7.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 170/ 291] blk.7.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 171/ 291] blk.7.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 172/ 291] blk.7.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 173/ 291] blk.7.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 174/ 291] blk.7.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q6_K .. size = 18.00 MiB -> 7.38 MiB\n", "[ 175/ 291] blk.8.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 176/ 291] blk.8.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 177/ 291] blk.8.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 178/ 291] blk.8.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 179/ 291] blk.8.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 180/ 291] blk.8.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 181/ 291] blk.8.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 182/ 291] blk.8.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 183/ 291] blk.8.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 184/ 291] blk.9.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 185/ 291] blk.9.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 186/ 291] blk.9.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 187/ 291] blk.9.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 188/ 291] blk.9.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 189/ 291] blk.9.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 190/ 291] blk.9.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 191/ 291] blk.9.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 192/ 291] blk.9.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 193/ 291] output.weight - [ 3072, 32064, 1, 1], type = bf16, converting to q6_K .. size = 187.88 MiB -> 77.06 MiB\n", "[ 194/ 291] blk.21.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 195/ 291] blk.21.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q6_K .. size = 48.00 MiB -> 19.69 MiB\n", "[ 196/ 291] blk.21.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 197/ 291] blk.21.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 198/ 291] blk.21.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 199/ 291] blk.21.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 200/ 291] blk.21.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q6_K .. size = 18.00 MiB -> 7.38 MiB\n", "[ 201/ 291] blk.22.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 202/ 291] blk.22.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 203/ 291] blk.22.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 204/ 291] blk.22.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 205/ 291] blk.22.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 206/ 291] blk.22.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 207/ 291] blk.22.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 208/ 291] blk.22.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 209/ 291] blk.22.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 210/ 291] blk.23.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 211/ 291] blk.23.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 212/ 291] blk.23.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 213/ 291] blk.23.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 214/ 291] blk.23.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 215/ 291] blk.23.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 216/ 291] blk.23.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 217/ 291] blk.23.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 218/ 291] blk.23.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 219/ 291] blk.24.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 220/ 291] blk.24.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q6_K .. size = 48.00 MiB -> 19.69 MiB\n", "[ 221/ 291] blk.24.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 222/ 291] blk.24.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 223/ 291] blk.24.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 224/ 291] blk.24.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 225/ 291] blk.24.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 226/ 291] blk.24.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 227/ 291] blk.24.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q6_K .. size = 18.00 MiB -> 7.38 MiB\n", "[ 228/ 291] blk.25.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 229/ 291] blk.25.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 230/ 291] blk.25.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 231/ 291] blk.25.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 232/ 291] blk.25.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 233/ 291] blk.25.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 234/ 291] blk.25.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 235/ 291] blk.25.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 236/ 291] blk.25.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 237/ 291] blk.26.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 238/ 291] blk.26.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 239/ 291] blk.26.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 240/ 291] blk.26.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 241/ 291] blk.26.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 242/ 291] blk.26.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 243/ 291] blk.26.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 244/ 291] blk.26.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 245/ 291] blk.26.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 246/ 291] blk.27.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 247/ 291] blk.27.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q6_K .. size = 48.00 MiB -> 19.69 MiB\n", "[ 248/ 291] blk.27.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 249/ 291] blk.27.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 250/ 291] blk.27.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 251/ 291] blk.27.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 252/ 291] blk.27.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 253/ 291] blk.27.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 254/ 291] blk.27.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q6_K .. size = 18.00 MiB -> 7.38 MiB\n", "[ 255/ 291] blk.28.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 256/ 291] blk.28.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q6_K .. size = 48.00 MiB -> 19.69 MiB\n", "[ 257/ 291] blk.28.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 258/ 291] blk.28.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 259/ 291] blk.28.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 260/ 291] blk.28.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 261/ 291] blk.28.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 262/ 291] blk.28.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 263/ 291] blk.28.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q6_K .. size = 18.00 MiB -> 7.38 MiB\n", "[ 264/ 291] blk.29.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 265/ 291] blk.29.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q6_K .. size = 48.00 MiB -> 19.69 MiB\n", "[ 266/ 291] blk.29.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 267/ 291] blk.29.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 268/ 291] blk.29.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 269/ 291] blk.29.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 270/ 291] blk.29.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 271/ 291] blk.29.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 272/ 291] blk.29.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q6_K .. size = 18.00 MiB -> 7.38 MiB\n", "[ 273/ 291] blk.30.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 274/ 291] blk.30.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q6_K .. size = 48.00 MiB -> 19.69 MiB\n", "[ 275/ 291] blk.30.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 276/ 291] blk.30.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 277/ 291] blk.30.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 278/ 291] blk.30.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 279/ 291] blk.30.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 280/ 291] blk.30.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 281/ 291] blk.30.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q6_K .. size = 18.00 MiB -> 7.38 MiB\n", "[ 282/ 291] blk.31.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 283/ 291] blk.31.ffn_down.weight - [ 8192, 3072, 1, 1], type = bf16, converting to q6_K .. size = 48.00 MiB -> 19.69 MiB\n", "[ 284/ 291] blk.31.ffn_gate.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 285/ 291] blk.31.ffn_up.weight - [ 3072, 8192, 1, 1], type = bf16, converting to q4_K .. size = 48.00 MiB -> 13.50 MiB\n", "[ 286/ 291] blk.31.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "[ 287/ 291] blk.31.attn_k.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 288/ 291] blk.31.attn_output.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 289/ 291] blk.31.attn_q.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q4_K .. size = 18.00 MiB -> 5.06 MiB\n", "[ 290/ 291] blk.31.attn_v.weight - [ 3072, 3072, 1, 1], type = bf16, converting to q6_K .. size = 18.00 MiB -> 7.38 MiB\n", "[ 291/ 291] output_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB\n", "llama_model_quantize_internal: model size = 7288.51 MB\n", "llama_model_quantize_internal: quant size = 2210.78 MB\n", "\n", "main: quantize time = 85490.82 ms\n", "main: total time = 85490.83 ms\n", "Unsloth: Conversion completed! Output location: ./testmodel/unsloth.Q4_K_M.gguf\n", "Unsloth: Saved Ollama Modelfile to testmodel/Modelfile\n" ] } ], "source": [ "model.save_pretrained_gguf(\"testmodel\", tokenizer, quantization_method = \"q4_k_m\")" ] }, { "cell_type": "code", "execution_count": 12, "id": "de91a837-490d-468a-9d2e-08d24453dbfe", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Unsloth: Merging 4bit and LoRA weights to 16bit...\n", "Unsloth: Will use up to 15.36 out of 28.5 RAM for saving.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "100%|███████████████████████████████████████████| 40/40 [01:22<00:00, 2.07s/it]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Unsloth: Saving tokenizer... Done.\n", "Unsloth: Saving model... This might take 5 minutes for Llama-7b...\n", "Done.\n", "==((====))== Unsloth: Conversion from QLoRA to GGUF information\n", " \\\\ /| [0] Installing llama.cpp will take 3 minutes.\n", "O^O/ \\_/ \\ [1] Converting HF to GGUF 16bits will take 3 minutes.\n", "\\ / [2] Converting GGUF 16bits to ['q4_k_m'] will take 10 minutes each.\n", " \"-____-\" In total, you will have to wait at least 16 minutes.\n", "\n", "Unsloth: [0] Installing llama.cpp. This will take 3 minutes...\n", "Unsloth: [1] Converting model at arbinMichael/phimed into bf16 GGUF format.\n", "The output location will be ./arbinMichael/phimed/unsloth.BF16.gguf\n", "This will take 3 minutes...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unsloth: Extending arbinMichael/phimed/tokenizer.model with added_tokens.json.\n", "Originally tokenizer.model is of size (32000).\n", "But we need to extend to sentencepiece vocab size (32011).\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "INFO:hf-to-gguf:Loading model: phimed\n", "INFO:gguf.gguf_writer:gguf: This GGUF file is for Little Endian only\n", "INFO:hf-to-gguf:Exporting model...\n", "INFO:hf-to-gguf:gguf: loading model weight map from 'model.safetensors.index.json'\n", "INFO:hf-to-gguf:gguf: loading model part 'model-00001-of-00006.safetensors'\n", "INFO:hf-to-gguf:token_embd.weight, torch.bfloat16 --> BF16, shape = {5120, 32064}\n", "INFO:hf-to-gguf:blk.0.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.0.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n", "INFO:hf-to-gguf:blk.0.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.0.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.0.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.0.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.0.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.0.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.0.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.1.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.1.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n", "INFO:hf-to-gguf:blk.1.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.1.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.1.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.1.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.1.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.1.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.1.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.2.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.2.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n", "INFO:hf-to-gguf:blk.2.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.2.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.2.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.2.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.2.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.2.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.2.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.3.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.3.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n", "INFO:hf-to-gguf:blk.3.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.3.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.3.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.3.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.3.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.3.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.3.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.4.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.4.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n", "INFO:hf-to-gguf:blk.4.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.4.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.4.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.4.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.4.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.4.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.4.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.5.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.5.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n", "INFO:hf-to-gguf:blk.5.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.5.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.5.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.5.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.5.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.5.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.5.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.6.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.6.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.6.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.6.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.6.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.6.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:gguf: loading model part 'model-00002-of-00006.safetensors'\n", "INFO:hf-to-gguf:blk.10.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.10.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n", "INFO:hf-to-gguf:blk.10.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.10.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.10.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.10.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.10.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.10.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.10.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.11.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.11.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n", "INFO:hf-to-gguf:blk.11.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.11.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.11.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.11.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.11.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.11.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.11.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.12.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.12.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n", "INFO:hf-to-gguf:blk.12.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.12.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.12.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.12.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.12.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.12.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.12.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.13.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.13.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n", "INFO:hf-to-gguf:blk.13.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.13.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.13.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.13.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.13.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.13.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.13.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.6.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.6.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n", "INFO:hf-to-gguf:blk.6.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.7.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.7.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n", "INFO:hf-to-gguf:blk.7.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.7.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.7.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.7.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.7.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.7.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.7.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.8.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.8.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n", "INFO:hf-to-gguf:blk.8.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.8.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.8.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.8.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.8.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.8.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.8.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.9.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.9.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n", "INFO:hf-to-gguf:blk.9.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.9.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.9.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.9.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.9.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.9.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.9.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:gguf: loading model part 'model-00003-of-00006.safetensors'\n", "INFO:hf-to-gguf:blk.14.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.14.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n", "INFO:hf-to-gguf:blk.14.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.14.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.14.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.14.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.14.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.14.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.14.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.15.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.15.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n", "INFO:hf-to-gguf:blk.15.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.15.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.15.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.15.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.15.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.15.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.15.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.16.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.16.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n", "INFO:hf-to-gguf:blk.16.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.16.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.16.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.16.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.16.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.16.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.16.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.17.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.17.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n", "INFO:hf-to-gguf:blk.17.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.17.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.17.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.17.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.17.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.17.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.17.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.18.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.18.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n", "INFO:hf-to-gguf:blk.18.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.18.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.18.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.18.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.18.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.18.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.18.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.19.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.19.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n", "INFO:hf-to-gguf:blk.19.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.19.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.19.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.19.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.19.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.19.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.19.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.20.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.20.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n", "INFO:hf-to-gguf:blk.20.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.20.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.20.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.20.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.20.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.20.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.20.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.21.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.21.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.21.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.21.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:gguf: loading model part 'model-00004-of-00006.safetensors'\n", "INFO:hf-to-gguf:blk.21.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.21.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n", "INFO:hf-to-gguf:blk.21.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.21.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.21.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.22.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.22.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n", "INFO:hf-to-gguf:blk.22.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.22.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.22.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.22.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.22.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.22.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.22.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.23.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.23.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n", "INFO:hf-to-gguf:blk.23.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.23.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.23.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.23.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.23.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.23.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.23.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.24.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.24.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n", "INFO:hf-to-gguf:blk.24.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.24.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.24.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.24.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.24.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.24.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.24.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.25.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.25.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n", "INFO:hf-to-gguf:blk.25.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.25.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.25.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.25.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.25.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.25.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.25.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.26.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.26.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n", "INFO:hf-to-gguf:blk.26.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.26.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.26.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.26.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.26.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.26.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.26.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.27.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.27.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n", "INFO:hf-to-gguf:blk.27.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.27.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.27.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.27.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.27.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.27.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.27.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.28.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.28.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.28.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.28.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.28.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:gguf: loading model part 'model-00005-of-00006.safetensors'\n", "INFO:hf-to-gguf:blk.28.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.28.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n", "INFO:hf-to-gguf:blk.28.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.28.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.29.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.29.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n", "INFO:hf-to-gguf:blk.29.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.29.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.29.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.29.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.29.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.29.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.29.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.30.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.30.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n", "INFO:hf-to-gguf:blk.30.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.30.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.30.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.30.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.30.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.30.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.30.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.31.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.31.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n", "INFO:hf-to-gguf:blk.31.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.31.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.31.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.31.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.31.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.31.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.31.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.32.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.32.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n", "INFO:hf-to-gguf:blk.32.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.32.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.32.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.32.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.32.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.32.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.32.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.33.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.33.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n", "INFO:hf-to-gguf:blk.33.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.33.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.33.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.33.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.33.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.33.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.33.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.34.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.34.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n", "INFO:hf-to-gguf:blk.34.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.34.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.34.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.34.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.34.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.34.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.34.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.35.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.35.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.35.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.35.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.35.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.35.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:gguf: loading model part 'model-00006-of-00006.safetensors'\n", "INFO:hf-to-gguf:output.weight, torch.bfloat16 --> BF16, shape = {5120, 32064}\n", "INFO:hf-to-gguf:blk.35.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.35.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n", "INFO:hf-to-gguf:blk.35.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.36.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.36.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n", "INFO:hf-to-gguf:blk.36.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.36.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.36.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.36.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.36.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.36.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.36.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.37.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.37.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n", "INFO:hf-to-gguf:blk.37.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.37.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.37.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.37.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.37.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.37.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.37.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.38.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.38.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n", "INFO:hf-to-gguf:blk.38.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.38.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.38.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.38.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.38.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.38.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.38.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.39.attn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.39.ffn_down.weight, torch.bfloat16 --> BF16, shape = {17920, 5120}\n", "INFO:hf-to-gguf:blk.39.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.39.ffn_up.weight, torch.bfloat16 --> BF16, shape = {5120, 17920}\n", "INFO:hf-to-gguf:blk.39.ffn_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:blk.39.attn_k.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:blk.39.attn_output.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.39.attn_q.weight, torch.bfloat16 --> BF16, shape = {5120, 5120}\n", "INFO:hf-to-gguf:blk.39.attn_v.weight, torch.bfloat16 --> BF16, shape = {5120, 1280}\n", "INFO:hf-to-gguf:output_norm.weight, torch.bfloat16 --> F32, shape = {5120}\n", "INFO:hf-to-gguf:Set meta model\n", "INFO:hf-to-gguf:Set model parameters\n", "INFO:hf-to-gguf:gguf: context length = 4096\n", "INFO:hf-to-gguf:gguf: embedding length = 5120\n", "INFO:hf-to-gguf:gguf: feed forward length = 17920\n", "INFO:hf-to-gguf:gguf: head count = 40\n", "INFO:hf-to-gguf:gguf: key-value head count = 10\n", "INFO:hf-to-gguf:gguf: rope theta = 10000.0\n", "INFO:hf-to-gguf:gguf: rms norm epsilon = 1e-05\n", "INFO:hf-to-gguf:gguf: file type = 32\n", "INFO:hf-to-gguf:Set model tokenizer\n", "INFO:gguf.vocab:Setting special token type bos to 1\n", "INFO:gguf.vocab:Setting special token type eos to 32000\n", "INFO:gguf.vocab:Setting special token type unk to 0\n", "INFO:gguf.vocab:Setting special token type pad to 32009\n", "INFO:gguf.vocab:Setting add_bos_token to False\n", "INFO:gguf.vocab:Setting add_eos_token to False\n", "INFO:gguf.vocab:Setting chat_template to {% if 'role' in messages[0] %}{% for message in messages %}{% if message['role'] == 'user' %}{{'<|user|>\n", "' + message['content'] + '<|end|>\n", "'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>\n", "' + message['content'] + '<|end|>\n", "'}}{% else %}{{'<|' + message['role'] + '|>\n", "' + message['content'] + '<|end|>\n", "'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n", "' }}{% endif %}{% else %}{% for message in messages %}{% if message['from'] == 'human' %}{{'<|user|>\n", "' + message['value'] + '<|end|>\n", "'}}{% elif message['from'] == 'gpt' %}{{'<|assistant|>\n", "' + message['value'] + '<|end|>\n", "'}}{% else %}{{'<|' + message['from'] + '|>\n", "' + message['value'] + '<|end|>\n", "'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n", "' }}{% endif %}{% endif %}\n", "INFO:hf-to-gguf:Set model quantization version\n", "INFO:gguf.gguf_writer:Writing the following files:\n", "INFO:gguf.gguf_writer:arbinMichael/phimed/unsloth.BF16.gguf: n_tensors = 363, total_size = 27.9G\n", "Writing: 100%|██████████| 27.9G/27.9G [04:26<00:00, 105Mbyte/s] \n", "INFO:hf-to-gguf:Model successfully exported to arbinMichael/phimed/unsloth.BF16.gguf\n", "Unsloth: Conversion completed! Output location: ./arbinMichael/phimed/unsloth.BF16.gguf\n", "Unsloth: [2] Converting GGUF 16bit into q4_k_m. This will take 20 minutes...\n", "main: build = 3651 (8f1d81a0)\n", "main: built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu\n", "main: quantizing './arbinMichael/phimed/unsloth.BF16.gguf' to './arbinMichael/phimed/unsloth.Q4_K_M.gguf' as Q4_K_M using 24 threads\n", "llama_model_loader: loaded meta data with 34 key-value pairs and 363 tensors from ./arbinMichael/phimed/unsloth.BF16.gguf (version GGUF V3 (latest))\n", "llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.\n", "llama_model_loader: - kv 0: general.architecture str = llama\n", "llama_model_loader: - kv 1: general.type str = model\n", "llama_model_loader: - kv 2: general.name str = Phi 3 Medium 4k Instruct Bnb 4bit\n", "llama_model_loader: - kv 3: general.organization str = Unsloth\n", "llama_model_loader: - kv 4: general.finetune str = 4k-instruct-bnb-4bit\n", "llama_model_loader: - kv 5: general.basename str = phi-3\n", "llama_model_loader: - kv 6: general.size_label str = medium\n", "llama_model_loader: - kv 7: llama.block_count u32 = 40\n", "llama_model_loader: - kv 8: llama.context_length u32 = 4096\n", "llama_model_loader: - kv 9: llama.embedding_length u32 = 5120\n", "llama_model_loader: - kv 10: llama.feed_forward_length u32 = 17920\n", "llama_model_loader: - kv 11: llama.attention.head_count u32 = 40\n", "llama_model_loader: - kv 12: llama.attention.head_count_kv u32 = 10\n", "llama_model_loader: - kv 13: llama.rope.freq_base f32 = 10000.000000\n", "llama_model_loader: - kv 14: llama.attention.layer_norm_rms_epsilon f32 = 0.000010\n", "llama_model_loader: - kv 15: llama.attention.key_length u32 = 128\n", "llama_model_loader: - kv 16: llama.attention.value_length u32 = 128\n", "llama_model_loader: - kv 17: general.file_type u32 = 32\n", "llama_model_loader: - kv 18: llama.vocab_size u32 = 32064\n", "llama_model_loader: - kv 19: llama.rope.dimension_count u32 = 128\n", "llama_model_loader: - kv 20: tokenizer.ggml.add_space_prefix bool = false\n", "llama_model_loader: - kv 21: tokenizer.ggml.model str = llama\n", "llama_model_loader: - kv 22: tokenizer.ggml.pre str = default\n", "llama_model_loader: - kv 23: tokenizer.ggml.tokens arr[str,32064] = [\"\", \"\", \"\", \"<0x00>\", \"<...\n", "llama_model_loader: - kv 24: tokenizer.ggml.scores arr[f32,32064] = [-1000.000000, -1000.000000, -1000.00...\n", "llama_model_loader: - kv 25: tokenizer.ggml.token_type arr[i32,32064] = [3, 3, 4, 6, 6, 6, 6, 6, 6, 6, 6, 6, ...\n", "llama_model_loader: - kv 26: tokenizer.ggml.bos_token_id u32 = 1\n", "llama_model_loader: - kv 27: tokenizer.ggml.eos_token_id u32 = 32000\n", "llama_model_loader: - kv 28: tokenizer.ggml.unknown_token_id u32 = 0\n", "llama_model_loader: - kv 29: tokenizer.ggml.padding_token_id u32 = 32009\n", "llama_model_loader: - kv 30: tokenizer.ggml.add_bos_token bool = false\n", "llama_model_loader: - kv 31: tokenizer.ggml.add_eos_token bool = false\n", "llama_model_loader: - kv 32: tokenizer.chat_template str = {% if 'role' in messages[0] %}{% for ...\n", "llama_model_loader: - kv 33: general.quantization_version u32 = 2\n", "llama_model_loader: - type f32: 81 tensors\n", "llama_model_loader: - type bf16: 282 tensors\n", "[ 1/ 363] token_embd.weight - [ 5120, 32064, 1, 1], type = bf16, converting to q4_K .. size = 313.12 MiB -> 88.07 MiB\n", "[ 2/ 363] blk.0.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 3/ 363] blk.0.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q6_K .. size = 175.00 MiB -> 71.78 MiB\n", "[ 4/ 363] blk.0.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 5/ 363] blk.0.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 6/ 363] blk.0.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 7/ 363] blk.0.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 8/ 363] blk.0.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 9/ 363] blk.0.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 10/ 363] blk.0.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q6_K .. size = 12.50 MiB -> 5.13 MiB\n", "[ 11/ 363] blk.1.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 12/ 363] blk.1.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q6_K .. size = 175.00 MiB -> 71.78 MiB\n", "[ 13/ 363] blk.1.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 14/ 363] blk.1.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 15/ 363] blk.1.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 16/ 363] blk.1.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 17/ 363] blk.1.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 18/ 363] blk.1.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 19/ 363] blk.1.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q6_K .. size = 12.50 MiB -> 5.13 MiB\n", "[ 20/ 363] blk.2.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 21/ 363] blk.2.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q6_K .. size = 175.00 MiB -> 71.78 MiB\n", "[ 22/ 363] blk.2.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 23/ 363] blk.2.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 24/ 363] blk.2.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 25/ 363] blk.2.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 26/ 363] blk.2.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 27/ 363] blk.2.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 28/ 363] blk.2.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q6_K .. size = 12.50 MiB -> 5.13 MiB\n", "[ 29/ 363] blk.3.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 30/ 363] blk.3.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q6_K .. size = 175.00 MiB -> 71.78 MiB\n", "[ 31/ 363] blk.3.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 32/ 363] blk.3.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 33/ 363] blk.3.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 34/ 363] blk.3.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 35/ 363] blk.3.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 36/ 363] blk.3.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 37/ 363] blk.3.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q6_K .. size = 12.50 MiB -> 5.13 MiB\n", "[ 38/ 363] blk.4.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 39/ 363] blk.4.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q6_K .. size = 175.00 MiB -> 71.78 MiB\n", "[ 40/ 363] blk.4.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 41/ 363] blk.4.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 42/ 363] blk.4.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 43/ 363] blk.4.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 44/ 363] blk.4.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 45/ 363] blk.4.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 46/ 363] blk.4.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q6_K .. size = 12.50 MiB -> 5.13 MiB\n", "[ 47/ 363] blk.5.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 48/ 363] blk.5.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 49/ 363] blk.5.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 50/ 363] blk.5.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 51/ 363] blk.5.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 52/ 363] blk.5.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 53/ 363] blk.5.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 54/ 363] blk.5.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 55/ 363] blk.5.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 56/ 363] blk.6.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 57/ 363] blk.6.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 58/ 363] blk.6.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 59/ 363] blk.6.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 60/ 363] blk.6.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 61/ 363] blk.6.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 62/ 363] blk.10.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 63/ 363] blk.10.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 64/ 363] blk.10.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 65/ 363] blk.10.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 66/ 363] blk.10.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 67/ 363] blk.10.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 68/ 363] blk.10.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 69/ 363] blk.10.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 70/ 363] blk.10.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q6_K .. size = 12.50 MiB -> 5.13 MiB\n", "[ 71/ 363] blk.11.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 72/ 363] blk.11.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q6_K .. size = 175.00 MiB -> 71.78 MiB\n", "[ 73/ 363] blk.11.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 74/ 363] blk.11.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 75/ 363] blk.11.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 76/ 363] blk.11.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 77/ 363] blk.11.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 78/ 363] blk.11.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 79/ 363] blk.11.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 80/ 363] blk.12.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 81/ 363] blk.12.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 82/ 363] blk.12.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 83/ 363] blk.12.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 84/ 363] blk.12.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 85/ 363] blk.12.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 86/ 363] blk.12.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 87/ 363] blk.12.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 88/ 363] blk.12.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 89/ 363] blk.13.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 90/ 363] blk.13.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 91/ 363] blk.13.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 92/ 363] blk.13.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 93/ 363] blk.13.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 94/ 363] blk.13.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 95/ 363] blk.13.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 96/ 363] blk.13.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 97/ 363] blk.13.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q6_K .. size = 12.50 MiB -> 5.13 MiB\n", "[ 98/ 363] blk.6.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 99/ 363] blk.6.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q6_K .. size = 175.00 MiB -> 71.78 MiB\n", "[ 100/ 363] blk.6.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 101/ 363] blk.7.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 102/ 363] blk.7.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 103/ 363] blk.7.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 104/ 363] blk.7.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 105/ 363] blk.7.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 106/ 363] blk.7.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 107/ 363] blk.7.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 108/ 363] blk.7.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 109/ 363] blk.7.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 110/ 363] blk.8.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 111/ 363] blk.8.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 112/ 363] blk.8.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 113/ 363] blk.8.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 114/ 363] blk.8.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 115/ 363] blk.8.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 116/ 363] blk.8.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 117/ 363] blk.8.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 118/ 363] blk.8.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 119/ 363] blk.9.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 120/ 363] blk.9.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q6_K .. size = 175.00 MiB -> 71.78 MiB\n", "[ 121/ 363] blk.9.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 122/ 363] blk.9.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 123/ 363] blk.9.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 124/ 363] blk.9.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 125/ 363] blk.9.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 126/ 363] blk.9.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 127/ 363] blk.9.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q6_K .. size = 12.50 MiB -> 5.13 MiB\n", "[ 128/ 363] blk.14.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 129/ 363] blk.14.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 130/ 363] blk.14.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 131/ 363] blk.14.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 132/ 363] blk.14.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 133/ 363] blk.14.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 134/ 363] blk.14.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 135/ 363] blk.14.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 136/ 363] blk.14.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 137/ 363] blk.15.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 138/ 363] blk.15.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 139/ 363] blk.15.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 140/ 363] blk.15.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 141/ 363] blk.15.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 142/ 363] blk.15.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 143/ 363] blk.15.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 144/ 363] blk.15.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 145/ 363] blk.15.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 146/ 363] blk.16.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 147/ 363] blk.16.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q6_K .. size = 175.00 MiB -> 71.78 MiB\n", "[ 148/ 363] blk.16.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 149/ 363] blk.16.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 150/ 363] blk.16.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 151/ 363] blk.16.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 152/ 363] blk.16.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 153/ 363] blk.16.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 154/ 363] blk.16.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q6_K .. size = 12.50 MiB -> 5.13 MiB\n", "[ 155/ 363] blk.17.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 156/ 363] blk.17.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 157/ 363] blk.17.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 158/ 363] blk.17.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 159/ 363] blk.17.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 160/ 363] blk.17.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 161/ 363] blk.17.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 162/ 363] blk.17.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 163/ 363] blk.17.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 164/ 363] blk.18.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 165/ 363] blk.18.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 166/ 363] blk.18.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 167/ 363] blk.18.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 168/ 363] blk.18.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 169/ 363] blk.18.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 170/ 363] blk.18.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 171/ 363] blk.18.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 172/ 363] blk.18.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 173/ 363] blk.19.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 174/ 363] blk.19.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q6_K .. size = 175.00 MiB -> 71.78 MiB\n", "[ 175/ 363] blk.19.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 176/ 363] blk.19.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 177/ 363] blk.19.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 178/ 363] blk.19.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 179/ 363] blk.19.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 180/ 363] blk.19.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 181/ 363] blk.19.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q6_K .. size = 12.50 MiB -> 5.13 MiB\n", "[ 182/ 363] blk.20.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 183/ 363] blk.20.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 184/ 363] blk.20.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 185/ 363] blk.20.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 186/ 363] blk.20.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 187/ 363] blk.20.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 188/ 363] blk.20.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 189/ 363] blk.20.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 190/ 363] blk.20.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 191/ 363] blk.21.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 192/ 363] blk.21.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 193/ 363] blk.21.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 194/ 363] blk.21.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 195/ 363] blk.21.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 196/ 363] blk.21.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 197/ 363] blk.21.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 198/ 363] blk.21.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 199/ 363] blk.21.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 200/ 363] blk.22.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 201/ 363] blk.22.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q6_K .. size = 175.00 MiB -> 71.78 MiB\n", "[ 202/ 363] blk.22.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 203/ 363] blk.22.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 204/ 363] blk.22.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 205/ 363] blk.22.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 206/ 363] blk.22.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 207/ 363] blk.22.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 208/ 363] blk.22.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q6_K .. size = 12.50 MiB -> 5.13 MiB\n", "[ 209/ 363] blk.23.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 210/ 363] blk.23.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 211/ 363] blk.23.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 212/ 363] blk.23.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 213/ 363] blk.23.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 214/ 363] blk.23.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 215/ 363] blk.23.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 216/ 363] blk.23.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 217/ 363] blk.23.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 218/ 363] blk.24.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 219/ 363] blk.24.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 220/ 363] blk.24.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 221/ 363] blk.24.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 222/ 363] blk.24.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 223/ 363] blk.24.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 224/ 363] blk.24.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 225/ 363] blk.24.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 226/ 363] blk.24.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 227/ 363] blk.25.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 228/ 363] blk.25.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q6_K .. size = 175.00 MiB -> 71.78 MiB\n", "[ 229/ 363] blk.25.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 230/ 363] blk.25.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 231/ 363] blk.25.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 232/ 363] blk.25.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 233/ 363] blk.25.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 234/ 363] blk.25.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 235/ 363] blk.25.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q6_K .. size = 12.50 MiB -> 5.13 MiB\n", "[ 236/ 363] blk.26.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 237/ 363] blk.26.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 238/ 363] blk.26.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 239/ 363] blk.26.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 240/ 363] blk.26.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 241/ 363] blk.26.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 242/ 363] blk.26.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 243/ 363] blk.26.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 244/ 363] blk.26.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 245/ 363] blk.27.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 246/ 363] blk.27.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 247/ 363] blk.27.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 248/ 363] blk.27.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 249/ 363] blk.27.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 250/ 363] blk.27.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 251/ 363] blk.27.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 252/ 363] blk.27.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 253/ 363] blk.27.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 254/ 363] blk.28.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 255/ 363] blk.28.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 256/ 363] blk.28.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 257/ 363] blk.28.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 258/ 363] blk.28.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q6_K .. size = 12.50 MiB -> 5.13 MiB\n", "[ 259/ 363] blk.28.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 260/ 363] blk.28.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q6_K .. size = 175.00 MiB -> 71.78 MiB\n", "[ 261/ 363] blk.28.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 262/ 363] blk.28.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 263/ 363] blk.29.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 264/ 363] blk.29.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 265/ 363] blk.29.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 266/ 363] blk.29.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 267/ 363] blk.29.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 268/ 363] blk.29.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 269/ 363] blk.29.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 270/ 363] blk.29.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 271/ 363] blk.29.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 272/ 363] blk.30.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 273/ 363] blk.30.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 274/ 363] blk.30.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 275/ 363] blk.30.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 276/ 363] blk.30.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 277/ 363] blk.30.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 278/ 363] blk.30.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 279/ 363] blk.30.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 280/ 363] blk.30.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 281/ 363] blk.31.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 282/ 363] blk.31.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q6_K .. size = 175.00 MiB -> 71.78 MiB\n", "[ 283/ 363] blk.31.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 284/ 363] blk.31.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 285/ 363] blk.31.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 286/ 363] blk.31.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 287/ 363] blk.31.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 288/ 363] blk.31.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 289/ 363] blk.31.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q6_K .. size = 12.50 MiB -> 5.13 MiB\n", "[ 290/ 363] blk.32.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 291/ 363] blk.32.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 292/ 363] blk.32.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 293/ 363] blk.32.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 294/ 363] blk.32.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 295/ 363] blk.32.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 296/ 363] blk.32.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 297/ 363] blk.32.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 298/ 363] blk.32.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 299/ 363] blk.33.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 300/ 363] blk.33.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 301/ 363] blk.33.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 302/ 363] blk.33.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 303/ 363] blk.33.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 304/ 363] blk.33.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 305/ 363] blk.33.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 306/ 363] blk.33.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 307/ 363] blk.33.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 308/ 363] blk.34.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 309/ 363] blk.34.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q6_K .. size = 175.00 MiB -> 71.78 MiB\n", "[ 310/ 363] blk.34.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 311/ 363] blk.34.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 312/ 363] blk.34.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 313/ 363] blk.34.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 314/ 363] blk.34.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 315/ 363] blk.34.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 316/ 363] blk.34.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q6_K .. size = 12.50 MiB -> 5.13 MiB\n", "[ 317/ 363] blk.35.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 318/ 363] blk.35.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 319/ 363] blk.35.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 320/ 363] blk.35.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 321/ 363] blk.35.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 322/ 363] blk.35.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q6_K .. size = 12.50 MiB -> 5.13 MiB\n", "[ 323/ 363] output.weight - [ 5120, 32064, 1, 1], type = bf16, converting to q6_K .. size = 313.12 MiB -> 128.43 MiB\n", "[ 324/ 363] blk.35.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 325/ 363] blk.35.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q6_K .. size = 175.00 MiB -> 71.78 MiB\n", "[ 326/ 363] blk.35.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 327/ 363] blk.36.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 328/ 363] blk.36.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q6_K .. size = 175.00 MiB -> 71.78 MiB\n", "[ 329/ 363] blk.36.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 330/ 363] blk.36.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 331/ 363] blk.36.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 332/ 363] blk.36.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 333/ 363] blk.36.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 334/ 363] blk.36.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 335/ 363] blk.36.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q6_K .. size = 12.50 MiB -> 5.13 MiB\n", "[ 336/ 363] blk.37.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 337/ 363] blk.37.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q6_K .. size = 175.00 MiB -> 71.78 MiB\n", "[ 338/ 363] blk.37.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 339/ 363] blk.37.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 340/ 363] blk.37.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 341/ 363] blk.37.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 342/ 363] blk.37.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 343/ 363] blk.37.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 344/ 363] blk.37.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q6_K .. size = 12.50 MiB -> 5.13 MiB\n", "[ 345/ 363] blk.38.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 346/ 363] blk.38.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q6_K .. size = 175.00 MiB -> 71.78 MiB\n", "[ 347/ 363] blk.38.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 348/ 363] blk.38.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 349/ 363] blk.38.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 350/ 363] blk.38.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 351/ 363] blk.38.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 352/ 363] blk.38.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 353/ 363] blk.38.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q6_K .. size = 12.50 MiB -> 5.13 MiB\n", "[ 354/ 363] blk.39.attn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 355/ 363] blk.39.ffn_down.weight - [17920, 5120, 1, 1], type = bf16, converting to q6_K .. size = 175.00 MiB -> 71.78 MiB\n", "[ 356/ 363] blk.39.ffn_gate.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 357/ 363] blk.39.ffn_up.weight - [ 5120, 17920, 1, 1], type = bf16, converting to q4_K .. size = 175.00 MiB -> 49.22 MiB\n", "[ 358/ 363] blk.39.ffn_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "[ 359/ 363] blk.39.attn_k.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q4_K .. size = 12.50 MiB -> 3.52 MiB\n", "[ 360/ 363] blk.39.attn_output.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 361/ 363] blk.39.attn_q.weight - [ 5120, 5120, 1, 1], type = bf16, converting to q4_K .. size = 50.00 MiB -> 14.06 MiB\n", "[ 362/ 363] blk.39.attn_v.weight - [ 5120, 1280, 1, 1], type = bf16, converting to q6_K .. size = 12.50 MiB -> 5.13 MiB\n", "[ 363/ 363] output_norm.weight - [ 5120, 1, 1, 1], type = f32, size = 0.020 MB\n", "llama_model_quantize_internal: model size = 26627.83 MB\n", "llama_model_quantize_internal: quant size = 8013.98 MB\n", "\n", "main: quantize time = 403335.17 ms\n", "main: total time = 403335.17 ms\n", "Unsloth: Conversion completed! Output location: ./arbinMichael/phimed/unsloth.Q4_K_M.gguf\n", "Unsloth: Saved Ollama Modelfile to arbinMichael/phimed/Modelfile\n", "Unsloth: Uploading GGUF to Huggingface Hub...\n" ] }, { "ename": "KeyboardInterrupt", "evalue": "", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[12], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m model\u001b[38;5;241m.\u001b[39mpush_to_hub_gguf(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124marbinMichael/phimed\u001b[39m\u001b[38;5;124m\"\u001b[39m, tokenizer, quantization_method \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mq4_k_m\u001b[39m\u001b[38;5;124m\"\u001b[39m, token \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhf_vtFNaIAElPdJZAlrlodgjoAcnxofiuDpaH\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", "File \u001b[0;32m~/.conda/envs/ft5/lib/python3.11/site-packages/unsloth/save.py:1830\u001b[0m, in \u001b[0;36munsloth_push_to_hub_gguf\u001b[0;34m(self, repo_id, tokenizer, quantization_method, first_conversion, use_temp_dir, commit_message, private, token, max_shard_size, create_pr, safe_serialization, revision, commit_description, tags, temporary_location, maximum_memory_usage)\u001b[0m\n\u001b[1;32m 1828\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m file_location \u001b[38;5;129;01min\u001b[39;00m all_file_locations:\n\u001b[1;32m 1829\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUnsloth: Uploading GGUF to Huggingface Hub...\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m-> 1830\u001b[0m username \u001b[38;5;241m=\u001b[39m upload_to_huggingface(\n\u001b[1;32m 1831\u001b[0m \u001b[38;5;28mself\u001b[39m, repo_id, token,\n\u001b[1;32m 1832\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mGGUF converted\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mgguf\u001b[39m\u001b[38;5;124m\"\u001b[39m, file_location, old_username, private,\n\u001b[1;32m 1833\u001b[0m )\n\u001b[1;32m 1834\u001b[0m link \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00musername\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mnew_save_directory\u001b[38;5;241m.\u001b[39mlstrip(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m/.\u001b[39m\u001b[38;5;124m'\u001b[39m)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \\\n\u001b[1;32m 1835\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m username \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m new_save_directory \u001b[38;5;28;01melse\u001b[39;00m \\\n\u001b[1;32m 1836\u001b[0m new_save_directory\u001b[38;5;241m.\u001b[39mlstrip(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m/.\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 1838\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSaved GGUF to https://huggingface.co/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mlink\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n", "File \u001b[0;32m~/.conda/envs/ft5/lib/python3.11/site-packages/unsloth/save.py:1416\u001b[0m, in \u001b[0;36mupload_to_huggingface\u001b[0;34m(model, save_directory, token, method, extra, file_location, old_username, private, create_config)\u001b[0m\n\u001b[1;32m 1413\u001b[0m \u001b[38;5;28;01mpass\u001b[39;00m\n\u001b[1;32m 1414\u001b[0m \u001b[38;5;28;01mpass\u001b[39;00m\n\u001b[0;32m-> 1416\u001b[0m hf_api\u001b[38;5;241m.\u001b[39mupload_file(\n\u001b[1;32m 1417\u001b[0m path_or_fileobj \u001b[38;5;241m=\u001b[39m file_location,\n\u001b[1;32m 1418\u001b[0m path_in_repo \u001b[38;5;241m=\u001b[39m uploaded_location,\n\u001b[1;32m 1419\u001b[0m repo_id \u001b[38;5;241m=\u001b[39m save_directory,\n\u001b[1;32m 1420\u001b[0m repo_type \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 1421\u001b[0m commit_message \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m(Trained with Unsloth)\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 1422\u001b[0m )\n\u001b[1;32m 1424\u001b[0m \u001b[38;5;66;03m# We also upload a config.json file\u001b[39;00m\n\u001b[1;32m 1425\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m create_config:\n", "File \u001b[0;32m~/.conda/envs/ft5/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py:114\u001b[0m, in \u001b[0;36mvalidate_hf_hub_args.._inner_fn\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 111\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m check_use_auth_token:\n\u001b[1;32m 112\u001b[0m kwargs \u001b[38;5;241m=\u001b[39m smoothly_deprecate_use_auth_token(fn_name\u001b[38;5;241m=\u001b[39mfn\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, has_token\u001b[38;5;241m=\u001b[39mhas_token, kwargs\u001b[38;5;241m=\u001b[39mkwargs)\n\u001b[0;32m--> 114\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m fn(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n", "File \u001b[0;32m~/.conda/envs/ft5/lib/python3.11/site-packages/huggingface_hub/hf_api.py:1398\u001b[0m, in \u001b[0;36mfuture_compatible.._inner\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1395\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrun_as_future(fn, \u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 1397\u001b[0m \u001b[38;5;66;03m# Otherwise, call the function normally\u001b[39;00m\n\u001b[0;32m-> 1398\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m fn(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n", "File \u001b[0;32m~/.conda/envs/ft5/lib/python3.11/site-packages/huggingface_hub/hf_api.py:4502\u001b[0m, in \u001b[0;36mHfApi.upload_file\u001b[0;34m(self, path_or_fileobj, path_in_repo, repo_id, token, repo_type, revision, commit_message, commit_description, create_pr, parent_commit, run_as_future)\u001b[0m\n\u001b[1;32m 4497\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mInvalid repo type, must be one of \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mREPO_TYPES\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 4499\u001b[0m commit_message \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 4500\u001b[0m commit_message \u001b[38;5;28;01mif\u001b[39;00m commit_message \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUpload \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpath_in_repo\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m with huggingface_hub\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 4501\u001b[0m )\n\u001b[0;32m-> 4502\u001b[0m operation \u001b[38;5;241m=\u001b[39m CommitOperationAdd(\n\u001b[1;32m 4503\u001b[0m path_or_fileobj\u001b[38;5;241m=\u001b[39mpath_or_fileobj,\n\u001b[1;32m 4504\u001b[0m path_in_repo\u001b[38;5;241m=\u001b[39mpath_in_repo,\n\u001b[1;32m 4505\u001b[0m )\n\u001b[1;32m 4507\u001b[0m commit_info \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcreate_commit(\n\u001b[1;32m 4508\u001b[0m repo_id\u001b[38;5;241m=\u001b[39mrepo_id,\n\u001b[1;32m 4509\u001b[0m repo_type\u001b[38;5;241m=\u001b[39mrepo_type,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 4516\u001b[0m parent_commit\u001b[38;5;241m=\u001b[39mparent_commit,\n\u001b[1;32m 4517\u001b[0m )\n\u001b[1;32m 4519\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m commit_info\u001b[38;5;241m.\u001b[39mpr_url \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", "File \u001b[0;32m:5\u001b[0m, in \u001b[0;36m__init__\u001b[0;34m(self, path_in_repo, path_or_fileobj)\u001b[0m\n", "File \u001b[0;32m~/.conda/envs/ft5/lib/python3.11/site-packages/huggingface_hub/_commit_api.py:189\u001b[0m, in \u001b[0;36mCommitOperationAdd.__post_init__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 187\u001b[0m \u001b[38;5;66;03m# Compute \"upload_info\" attribute\u001b[39;00m\n\u001b[1;32m 188\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpath_or_fileobj, \u001b[38;5;28mstr\u001b[39m):\n\u001b[0;32m--> 189\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mupload_info \u001b[38;5;241m=\u001b[39m UploadInfo\u001b[38;5;241m.\u001b[39mfrom_path(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpath_or_fileobj)\n\u001b[1;32m 190\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpath_or_fileobj, \u001b[38;5;28mbytes\u001b[39m):\n\u001b[1;32m 191\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mupload_info \u001b[38;5;241m=\u001b[39m UploadInfo\u001b[38;5;241m.\u001b[39mfrom_bytes(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpath_or_fileobj)\n", "File \u001b[0;32m~/.conda/envs/ft5/lib/python3.11/site-packages/huggingface_hub/lfs.py:84\u001b[0m, in \u001b[0;36mUploadInfo.from_path\u001b[0;34m(cls, path)\u001b[0m\n\u001b[1;32m 82\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m io\u001b[38;5;241m.\u001b[39mopen(path, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrb\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;28;01mas\u001b[39;00m file:\n\u001b[1;32m 83\u001b[0m sample \u001b[38;5;241m=\u001b[39m file\u001b[38;5;241m.\u001b[39mpeek(\u001b[38;5;241m512\u001b[39m)[:\u001b[38;5;241m512\u001b[39m]\n\u001b[0;32m---> 84\u001b[0m sha \u001b[38;5;241m=\u001b[39m sha_fileobj(file)\n\u001b[1;32m 85\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mcls\u001b[39m(size\u001b[38;5;241m=\u001b[39msize, sha256\u001b[38;5;241m=\u001b[39msha, sample\u001b[38;5;241m=\u001b[39msample)\n", "File \u001b[0;32m~/.conda/envs/ft5/lib/python3.11/site-packages/huggingface_hub/utils/sha.py:25\u001b[0m, in \u001b[0;36msha_fileobj\u001b[0;34m(fileobj, chunk_size)\u001b[0m\n\u001b[1;32m 23\u001b[0m sha \u001b[38;5;241m=\u001b[39m sha256()\n\u001b[1;32m 24\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[0;32m---> 25\u001b[0m chunk \u001b[38;5;241m=\u001b[39m fileobj\u001b[38;5;241m.\u001b[39mread(chunk_size)\n\u001b[1;32m 26\u001b[0m sha\u001b[38;5;241m.\u001b[39mupdate(chunk)\n\u001b[1;32m 27\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m chunk:\n", "\u001b[0;31mKeyboardInterrupt\u001b[0m: " ] } ], "source": [ "model.push_to_hub_gguf(\"arbinMichael/phimed\", tokenizer, quantization_method = \"q4_k_m\", token = \"\")" ] }, { "cell_type": "code", "execution_count": 11, "id": "ca03f0a5-5d01-4a92-ac3d-6b53f4d8f6e2", "metadata": {}, "outputs": [], "source": [ "model.save_pretrained(\"lora_model_new\")" ] }, { "cell_type": "code", "execution_count": 12, "id": "2e42b81e-e794-44f2-9002-358d8f8837f8", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "('lora_model_new/tokenizer_config.json',\n", " 'lora_model_new/special_tokens_map.json',\n", " 'lora_model_new/tokenizer.model',\n", " 'lora_model_new/added_tokens.json',\n", " 'lora_model_new/tokenizer.json')" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tokenizer.save_pretrained(\"lora_model_new\")" ] }, { "cell_type": "code", "execution_count": 15, "id": "70239d84-fd1e-4385-a9f6-39bbbcee8078", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "21\n" ] } ], "source": [ "um = round(torch.cuda.max_memory_reserved()/1024/1024/1024,)\n", "print(um)" ] }, { "cell_type": "code", "execution_count": 9, "id": "33d4d882-2eec-4c12-957b-2f96150a2cc8", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Peak reserved memory = 9.447 GB.\n" ] } ], "source": [ "used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n", "print(f\"Peak reserved memory = {used_memory} GB.\")" ] }, { "cell_type": "code", "execution_count": null, "id": "8c2d42e5-df94-49db-acfc-71989060825a", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.9" } }, "nbformat": 4, "nbformat_minor": 5 }