{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "id": "w7JMaiAsmqp_" }, "outputs": [], "source": [ "%%capture\n", "!pip install unsloth\n", "# Also get the latest nightly Unsloth!\n", "!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git" ] }, { "cell_type": "markdown", "metadata": { "id": "qfmW3jwuqqcZ" }, "source": [ "## Getting Model" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 322, "referenced_widgets": [ "f76291562fb4491a95b2445dcbf68fe5", "ab21ec1286e248c9b3917478af06ca6f", "5a3fb93595054105b75b1e6e7625f198", "eefaa34fa5a241329776cc32e2a41ea6", "e20ef5b49aaa4e8aa2556b130e4785a3", "52f452bd83ec4a68bec68bdc230f38ea", "acfc49afc4094c42a02c82821599e2a2", "7a5d4bec20ed423288c98f3639f0ac15", "b2834e0c53a041fd83177acd8a49dada", "d9af5a8a7b014123b375b94671dcd98d", "e5f03fa994064640bef91f19b949b103", "3902c8e9ed3a4c7caf2028fa3f7fea78", "7485d9bcce4944299fb8273d9f409aab", "dae36b0b452448e59c0a3f423cb1bd41", "a1f67b9a564d4247b27b4b9c92b4501b", "73c3d624fca74964b55237a3d2f2a933", "823988be4fd64f0aacc3237d4d5310ee", "40258eb34329418e86956b27fceca736", "ea84e172c6894ba4a1ded6d94c0ce916", "935edef9eb084c6594e6015711f1b0f8", "0499f0b19a3c49e8aeb3b800460198c4", "42087d8365774a978022273673bc66e1", "cf91ce82ec144def8900988d07be75bd", "37ffad62ae0842318bf571f94368261b", "0330d56f32c04639a9231f0a6b56c7c8", "a4870126718e41ed8431862de12e01b0", "f626588ccb1f4ce98dcac93c07b34247", "2632f3a9f52140ddb6ea8a36b5097609", "87835426f0424977bc29fdb801a2fd72", "cb33e8204d2143d097d45b0c818063be", "0ea67e0e36b9449f9f0aa3a8f3fd5884", "e4f56aa9412d4066b501f17584f5c20a", "c539fec729a04a5b902556e24aa90c28", "7a3683867bd441f8a01d85e09db03a3f", "1040e1a8b834432894904b41ba779eab", "ab4254a78b684b2fa3e3db0c49eb61dc", "dc02f9ab70b649158e9fb581c681f905", "f13f507e4118404a80ac3c946a8e605e", "6bf30f78b2c543c89a2ec9bdcde8bb80", "2611620373f7433da78d09bbb4d4bc74", "7bcac9a047c840ce905efc44183c79b0", "31a0bef9e16c49ad8b4b6f8a9e96ff3b", "2dec7c7037194ce396ef8316e0d9a427", "b0323c0f6d0148d7b435327119c6eac1", "40c629ffc272467e847f875dc5e4c30a", "f0e51570b9144dee81e1d8b34760d7eb", "53444ce6e77244579f9a445e45ac72d5", "893b0854c6ad4d83953daf815f33762e", "1afc51a0ad864224a28e080ac3d7b8b5", "c21b5490a9054283b198bbb1f010aa80", "9df5f2edcc084a62b90d509466eac196", "0da23bf302d84e5984559d25ed36de80", "e09cec353915478e872256163029cd23", "11097c660c454a56a3d75991bce307e8", "cc923a4a41fd4a678485486e0363c934" ] }, "id": "G1wZb3v-m2mm", "outputId": "a4aead8c-2ade-45eb-f3f8-4bddc349263b" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "πŸ¦₯ Unsloth: Will patch your computer to enable 2x faster free finetuning.\n", "πŸ¦₯ Unsloth Zoo will now patch everything to make training faster!\n", "==((====))== Unsloth 2025.1.7: Fast Llama patching. Transformers: 4.47.1.\n", " \\\\ /| GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.\n", "O^O/ \\_/ \\ Torch: 2.5.1+cu121. CUDA: 7.5. CUDA Toolkit: 12.1. Triton: 3.1.0\n", "\\ / Bfloat16 = FALSE. FA [Xformers = 0.0.29.post1. FA2 = False]\n", " \"-____-\" Free Apache license: http://github.com/unslothai/unsloth\n", "Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "f76291562fb4491a95b2445dcbf68fe5", "version_major": 2, "version_minor": 0 }, "text/plain": [ "model.safetensors: 0%| | 0.00/2.24G [00:00\n", " \n", " \n", " [60/60 20:45, Epoch 0/1]\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StepTraining Loss
11.607800
21.688200
31.635200
41.606400
51.586400
61.475200
71.558900
81.434800
91.449500
101.496400
111.462800
121.461600
131.497200
141.415300
151.393200
161.406100
171.339500
181.465900
191.384600
201.394900
211.386800
221.335600
231.383300
241.384300
251.363300
261.276200
271.305700
281.317400
291.342800
301.319700
311.316800
321.335000
331.352500
341.324700
351.321800
361.283700
371.276500
381.323100
391.374100
401.371300
411.301100
421.300200
431.340100
441.355800
451.319900
461.302000
471.371400
481.313300
491.297900
501.335400
511.241600
521.276900
531.268200
541.216300
551.357800
561.266900
571.257100
581.310000
591.326600
601.305500

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "trainer_stats = trainer.train()" ] }, { "cell_type": "markdown", "metadata": { "id": "TiiAp_jlsmJN" }, "source": [ "## Testing the fine-tuned model" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "7HLLFh9yrh5t", "outputId": "7c72867b-f73a-44ed-ecbf-1ad6c34bb6aa" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "<|begin_of_text|>Once upon a time, there was a kingdom, far far\n", "away, ruled by a king. The king \n", "loved to eat delicious food, so he would send his servants out to find the best food for him to eat.\n", "\n", "\n", "One day, the king's servant found a delicious cake in the forest. He brought it back to the king, who \n", "was so happy to eat it. He ate the cake all by himself, and the king was very happy.\n", "\n", "The king loved \n", "the cake so much that he decided to send out his servants to find more delicious cakes. He wanted to \n", "make sure that everyone in the kingdom was happy and had a delicious cake to eat.<|end_of_text|>" ] } ], "source": [ "from transformers import TextIteratorStreamer\n", "from threading import Thread\n", "import textwrap\n", "\n", "# Prepare the model for inference\n", "FastLanguageModel.for_inference(model)\n", "\n", "text_streamer = TextIteratorStreamer(tokenizer)\n", "max_print_width = 100\n", "\n", "inputs = tokenizer(\n", " [\"Once upon a time, there was a kingdom, far far away,\"],\n", " return_tensors=\"pt\"\n", ").to(\"cuda\")\n", "\n", "generation_kwargs = dict(\n", " inputs,\n", " streamer=text_streamer,\n", " max_new_tokens=512,\n", " use_cache=True,\n", ")\n", "\n", "# Start the generation in a separate thread\n", "thread = Thread(target=model.generate, kwargs=generation_kwargs)\n", "thread.start()\n", "\n", "# Stream the generated text\n", "length = 0\n", "for j, new_text in enumerate(text_streamer):\n", " if j == 0:\n", " wrapped_text = textwrap.wrap(new_text, width=max_print_width)\n", " length = len(wrapped_text[-1])\n", " wrapped_text = \"\\n\".join(wrapped_text)\n", " print(wrapped_text)\n", " else:\n", " length += len(new_text)\n", " if length >= max_print_width:\n", " length = 0\n", " print()\n", " print(new_text, end=\"\")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "-eNZ2ylMxxp7" }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 9, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "yYq5BXl21hmK", "outputId": "428ebc13-3753-450c-a2ce-4f5020abcd36" }, "outputs": [ { "data": { "text/plain": [ "('lora_model/tokenizer_config.json',\n", " 'lora_model/special_tokens_map.json',\n", " 'lora_model/tokenizer.json')" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.save_pretrained(\"lora_model\") # Local saving\n", "tokenizer.save_pretrained(\"lora_model\")\n", "# model.push_to_hub(\"your_name/lora_model\", token = \"...\") # Online saving\n", "# tokenizer.push_to_hub(\"your_name/lora_model\", token = \"...\") # Online saving" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 1000, "referenced_widgets": [ "9dd544ba91794c52a1da18ea83a7a064", "2588d934881a4066ae76b840210ce78e", "40503ef20807495bb75008025a833422", "f0cd12c7b8a943c08f343538db0c29ed", "86d067e24e264a0bb61e124b5acac903", "e342ce153d7c464a976daac75b411ee5", "332d0fdabe7144518bf3f0e25f1ab267", "7774618b4b004b30a16dc8ea9ab8665e", "71476f32a9ab4c018cad0756a521f3f8", "073c3f68e21b45ee8c68c9b27cfad3f2", "0b29d4ae783c47f5b3d9b02eae33c3c4", "ed49ab26c36646f8b6772ec84d261a8e", "6bba6df81a4a4737949eda4ecbbed788", "7427290f19d24ad5a728b0c595272afe", "ffdeddbb32214d18a3eee39496427510", "cdd0bf225f7644ed87cad3e5220849a6", "8d213cf2323f44ba8b850986abd897e8", "1fb786a0c41a4365ab016b229eea7dd6", "17eb0c53c2284e498d0d0e1205311209", "beac9d74a86e48939eedea6c3f679f74", "0808e729718d443a8daaf35788c3b867", "e5f44ef088eb492383c1271ecb20988d" ] }, "id": "gIxmo-sa1hh1", "outputId": "92419755-85ec-495f-b89b-9d1840f3bd6d" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Unsloth: You have 1 CPUs. Using `safe_serialization` is 10x slower.\n", "We shall switch to Pytorch saving, which might take 3 minutes and not 30 minutes.\n", "To force `safe_serialization`, set it to `None` instead.\n", "Unsloth: Kaggle/Colab has limited disk space. We need to delete the downloaded\n", "model which will save 4-16GB of disk space, allowing you to save on Kaggle/Colab.\n", "Unsloth: Will remove a cached repo with size 2.2G\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Unsloth: Merging 4bit and LoRA weights to 16bit...\n", "Unsloth: Will use up to 5.9 out of 12.67 RAM for saving.\n", "Unsloth: Saving model... This might take 5 minutes ...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 28/28 [00:01<00:00, 18.70it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Unsloth: Saving tokenizer... Done.\n", "Unsloth: Saving model/pytorch_model-00001-of-00002.bin...\n", "Unsloth: Saving model/pytorch_model-00002-of-00002.bin...\n", "Done.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unsloth: Converting llama model. Can use fast conversion = False.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "==((====))== Unsloth: Conversion from QLoRA to GGUF information\n", " \\\\ /| [0] Installing llama.cpp might take 3 minutes.\n", "O^O/ \\_/ \\ [1] Converting HF to GGUF 16bits might take 3 minutes.\n", "\\ / [2] Converting GGUF 16bits to ['q8_0'] might take 10 minutes each.\n", " \"-____-\" In total, you will have to wait at least 16 minutes.\n", "\n", "Unsloth: Installing llama.cpp. This might take 3 minutes...\n", "Unsloth: CMAKE detected. Finalizing some steps for installation.\n", "Unsloth: [1] Converting model at model into q8_0 GGUF format.\n", "The output location will be /content/model/unsloth.Q8_0.gguf\n", "This might take 3 minutes...\n", "INFO:hf-to-gguf:Loading model: model\n", "INFO:gguf.gguf_writer:gguf: This GGUF file is for Little Endian only\n", "INFO:hf-to-gguf:Exporting model...\n", "INFO:hf-to-gguf:rope_freqs.weight, torch.float32 --> F32, shape = {64}\n", "INFO:hf-to-gguf:gguf: loading model weight map from 'pytorch_model.bin.index.json'\n", "INFO:hf-to-gguf:gguf: loading model part 'pytorch_model-00001-of-00002.bin'\n", "INFO:hf-to-gguf:token_embd.weight, torch.float16 --> Q8_0, shape = {3072, 128256}\n", "INFO:hf-to-gguf:blk.0.attn_q.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.0.attn_k.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.0.attn_v.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.0.attn_output.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.0.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.0.ffn_up.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.0.ffn_down.weight, torch.float16 --> Q8_0, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.0.attn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.0.ffn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.1.attn_q.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.1.attn_k.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.1.attn_v.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.1.attn_output.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.1.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.1.ffn_up.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.1.ffn_down.weight, torch.float16 --> Q8_0, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.1.attn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.1.ffn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.2.attn_q.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.2.attn_k.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.2.attn_v.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.2.attn_output.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.2.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.2.ffn_up.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.2.ffn_down.weight, torch.float16 --> Q8_0, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.2.attn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.2.ffn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.3.attn_q.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.3.attn_k.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.3.attn_v.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.3.attn_output.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.3.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.3.ffn_up.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.3.ffn_down.weight, torch.float16 --> Q8_0, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.3.attn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.3.ffn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.4.attn_q.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.4.attn_k.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.4.attn_v.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.4.attn_output.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.4.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.4.ffn_up.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.4.ffn_down.weight, torch.float16 --> Q8_0, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.4.attn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.4.ffn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.5.attn_q.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.5.attn_k.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.5.attn_v.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.5.attn_output.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.5.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.5.ffn_up.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.5.ffn_down.weight, torch.float16 --> Q8_0, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.5.attn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.5.ffn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.6.attn_q.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.6.attn_k.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.6.attn_v.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.6.attn_output.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.6.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.6.ffn_up.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.6.ffn_down.weight, torch.float16 --> Q8_0, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.6.attn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.6.ffn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.7.attn_q.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.7.attn_k.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.7.attn_v.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.7.attn_output.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.7.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.7.ffn_up.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.7.ffn_down.weight, torch.float16 --> Q8_0, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.7.attn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.7.ffn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.8.attn_q.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.8.attn_k.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.8.attn_v.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.8.attn_output.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.8.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.8.ffn_up.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.8.ffn_down.weight, torch.float16 --> Q8_0, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.8.attn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.8.ffn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.9.attn_q.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.9.attn_k.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.9.attn_v.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.9.attn_output.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.9.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.9.ffn_up.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.9.ffn_down.weight, torch.float16 --> Q8_0, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.9.attn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.9.ffn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.10.attn_q.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.10.attn_k.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.10.attn_v.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.10.attn_output.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.10.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.10.ffn_up.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.10.ffn_down.weight, torch.float16 --> Q8_0, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.10.attn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.10.ffn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.11.attn_q.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.11.attn_k.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.11.attn_v.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.11.attn_output.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.11.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.11.ffn_up.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.11.ffn_down.weight, torch.float16 --> Q8_0, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.11.attn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.11.ffn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.12.attn_q.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.12.attn_k.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.12.attn_v.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.12.attn_output.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.12.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.12.ffn_up.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.12.ffn_down.weight, torch.float16 --> Q8_0, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.12.attn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.12.ffn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.13.attn_q.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.13.attn_k.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.13.attn_v.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.13.attn_output.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.13.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.13.ffn_up.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.13.ffn_down.weight, torch.float16 --> Q8_0, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.13.attn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.13.ffn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.14.attn_q.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.14.attn_k.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.14.attn_v.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.14.attn_output.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.14.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.14.ffn_up.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.14.ffn_down.weight, torch.float16 --> Q8_0, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.14.attn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.14.ffn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.15.attn_q.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.15.attn_k.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.15.attn_v.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.15.attn_output.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.15.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.15.ffn_up.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.15.ffn_down.weight, torch.float16 --> Q8_0, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.15.attn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.15.ffn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.16.attn_q.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.16.attn_k.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.16.attn_v.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.16.attn_output.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.16.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.16.ffn_up.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.16.ffn_down.weight, torch.float16 --> Q8_0, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.16.attn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.16.ffn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.17.attn_q.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.17.attn_k.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.17.attn_v.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.17.attn_output.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.17.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.17.ffn_up.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.17.ffn_down.weight, torch.float16 --> Q8_0, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.17.attn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.17.ffn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.18.attn_q.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.18.attn_k.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.18.attn_v.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.18.attn_output.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.18.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.18.ffn_up.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.18.ffn_down.weight, torch.float16 --> Q8_0, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.18.attn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.18.ffn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.19.attn_q.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.19.attn_k.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.19.attn_v.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.19.attn_output.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.19.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.19.ffn_up.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.19.ffn_down.weight, torch.float16 --> Q8_0, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.19.attn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.19.ffn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.20.attn_q.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.20.attn_k.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.20.attn_v.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.20.attn_output.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.20.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.20.ffn_up.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:gguf: loading model part 'pytorch_model-00002-of-00002.bin'\n", "INFO:hf-to-gguf:blk.20.ffn_down.weight, torch.float16 --> Q8_0, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.20.attn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.20.ffn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.21.attn_q.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.21.attn_k.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.21.attn_v.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.21.attn_output.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.21.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.21.ffn_up.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.21.ffn_down.weight, torch.float16 --> Q8_0, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.21.attn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.21.ffn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.22.attn_q.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.22.attn_k.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.22.attn_v.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.22.attn_output.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.22.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.22.ffn_up.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.22.ffn_down.weight, torch.float16 --> Q8_0, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.22.attn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.22.ffn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.23.attn_q.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.23.attn_k.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.23.attn_v.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.23.attn_output.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.23.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.23.ffn_up.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.23.ffn_down.weight, torch.float16 --> Q8_0, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.23.attn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.23.ffn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.24.attn_q.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.24.attn_k.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.24.attn_v.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.24.attn_output.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.24.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.24.ffn_up.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.24.ffn_down.weight, torch.float16 --> Q8_0, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.24.attn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.24.ffn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.25.attn_q.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.25.attn_k.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.25.attn_v.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.25.attn_output.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.25.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.25.ffn_up.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.25.ffn_down.weight, torch.float16 --> Q8_0, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.25.attn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.25.ffn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.26.attn_q.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.26.attn_k.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.26.attn_v.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.26.attn_output.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.26.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.26.ffn_up.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.26.ffn_down.weight, torch.float16 --> Q8_0, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.26.attn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.26.ffn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.27.attn_q.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.27.attn_k.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.27.attn_v.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.27.attn_output.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.27.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.27.ffn_up.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.27.ffn_down.weight, torch.float16 --> Q8_0, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.27.attn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.27.ffn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:output_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:Set meta model\n", "INFO:hf-to-gguf:Set model parameters\n", "INFO:hf-to-gguf:gguf: context length = 131072\n", "INFO:hf-to-gguf:gguf: embedding length = 3072\n", "INFO:hf-to-gguf:gguf: feed forward length = 8192\n", "INFO:hf-to-gguf:gguf: head count = 24\n", "INFO:hf-to-gguf:gguf: key-value head count = 8\n", "INFO:hf-to-gguf:gguf: rope theta = 500000.0\n", "INFO:hf-to-gguf:gguf: rms norm epsilon = 1e-05\n", "INFO:hf-to-gguf:gguf: file type = 7\n", "INFO:hf-to-gguf:Set model tokenizer\n", "INFO:numexpr.utils:NumExpr defaulting to 2 threads.\n", "2025-01-27 09:28:53.169984: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", "2025-01-27 09:28:53.198162: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", "2025-01-27 09:28:53.205662: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", "2025-01-27 09:28:55.603739: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n", "INFO:gguf.vocab:Adding 280147 merge(s).\n", "INFO:gguf.vocab:Setting special token type bos to 128000\n", "INFO:gguf.vocab:Setting special token type eos to 128001\n", "INFO:gguf.vocab:Setting special token type pad to 128004\n", "INFO:gguf.vocab:Setting add_bos_token to True\n", "INFO:hf-to-gguf:Set model quantization version\n", "INFO:gguf.gguf_writer:Writing the following files:\n", "INFO:gguf.gguf_writer:/content/model/unsloth.Q8_0.gguf: n_tensors = 255, total_size = 3.4G\n", "Writing: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 3.41G/3.41G [01:09<00:00, 48.9Mbyte/s]\n", "INFO:hf-to-gguf:Model successfully exported to /content/model/unsloth.Q8_0.gguf\n", "Unsloth: Conversion completed! Output location: /content/model/unsloth.Q8_0.gguf\n", "Unsloth: Merging 4bit and LoRA weights to 16bit...\n", "Unsloth: Will use up to 5.95 out of 12.67 RAM for saving.\n", "Unsloth: Saving model... This might take 5 minutes ...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 28/28 [00:00<00:00, 31.39it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Unsloth: Saving tokenizer... Done.\n", "Unsloth: Saving RushabhShah122000/model/pytorch_model-00001-of-00002.bin...\n", "Unsloth: Saving RushabhShah122000/model/pytorch_model-00002-of-00002.bin...\n", "Done.\n", "==((====))== Unsloth: Conversion from QLoRA to GGUF information\n", " \\\\ /| [0] Installing llama.cpp might take 3 minutes.\n", "O^O/ \\_/ \\ [1] Converting HF to GGUF 16bits might take 3 minutes.\n", "\\ / [2] Converting GGUF 16bits to ['q8_0'] might take 10 minutes each.\n", " \"-____-\" In total, you will have to wait at least 16 minutes.\n", "\n", "Unsloth: Installing llama.cpp. This might take 3 minutes...\n", "Unsloth: [1] Converting model at RushabhShah122000/model into q8_0 GGUF format.\n", "The output location will be /content/RushabhShah122000/model/unsloth.Q8_0.gguf\n", "This might take 3 minutes...\n", "INFO:hf-to-gguf:Loading model: model\n", "INFO:gguf.gguf_writer:gguf: This GGUF file is for Little Endian only\n", "INFO:hf-to-gguf:Exporting model...\n", "INFO:hf-to-gguf:rope_freqs.weight, torch.float32 --> F32, shape = {64}\n", "INFO:hf-to-gguf:gguf: loading model weight map from 'pytorch_model.bin.index.json'\n", "INFO:hf-to-gguf:gguf: loading model part 'pytorch_model-00001-of-00002.bin'\n", "INFO:hf-to-gguf:token_embd.weight, torch.float16 --> Q8_0, shape = {3072, 128256}\n", "INFO:hf-to-gguf:blk.0.attn_q.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.0.attn_k.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.0.attn_v.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.0.attn_output.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.0.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.0.ffn_up.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.0.ffn_down.weight, torch.float16 --> Q8_0, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.0.attn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.0.ffn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.1.attn_q.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.1.attn_k.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.1.attn_v.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.1.attn_output.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.1.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.1.ffn_up.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.1.ffn_down.weight, torch.float16 --> Q8_0, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.1.attn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.1.ffn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.2.attn_q.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.2.attn_k.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.2.attn_v.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.2.attn_output.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.2.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.2.ffn_up.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.2.ffn_down.weight, torch.float16 --> Q8_0, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.2.attn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.2.ffn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.3.attn_q.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.3.attn_k.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.3.attn_v.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.3.attn_output.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.3.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.3.ffn_up.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.3.ffn_down.weight, torch.float16 --> Q8_0, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.3.attn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.3.ffn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.4.attn_q.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.4.attn_k.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.4.attn_v.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.4.attn_output.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.4.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.4.ffn_up.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.4.ffn_down.weight, torch.float16 --> Q8_0, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.4.attn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.4.ffn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.5.attn_q.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.5.attn_k.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.5.attn_v.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.5.attn_output.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.5.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.5.ffn_up.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.5.ffn_down.weight, torch.float16 --> Q8_0, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.5.attn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.5.ffn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.6.attn_q.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.6.attn_k.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.6.attn_v.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.6.attn_output.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.6.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.6.ffn_up.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.6.ffn_down.weight, torch.float16 --> Q8_0, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.6.attn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.6.ffn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.7.attn_q.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.7.attn_k.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.7.attn_v.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.7.attn_output.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.7.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.7.ffn_up.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.7.ffn_down.weight, torch.float16 --> Q8_0, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.7.attn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.7.ffn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.8.attn_q.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.8.attn_k.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.8.attn_v.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.8.attn_output.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.8.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.8.ffn_up.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.8.ffn_down.weight, torch.float16 --> Q8_0, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.8.attn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.8.ffn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.9.attn_q.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.9.attn_k.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.9.attn_v.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.9.attn_output.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.9.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.9.ffn_up.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.9.ffn_down.weight, torch.float16 --> Q8_0, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.9.attn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.9.ffn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.10.attn_q.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.10.attn_k.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.10.attn_v.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.10.attn_output.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.10.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.10.ffn_up.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.10.ffn_down.weight, torch.float16 --> Q8_0, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.10.attn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.10.ffn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.11.attn_q.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.11.attn_k.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.11.attn_v.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.11.attn_output.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.11.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.11.ffn_up.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.11.ffn_down.weight, torch.float16 --> Q8_0, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.11.attn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.11.ffn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.12.attn_q.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.12.attn_k.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.12.attn_v.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.12.attn_output.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.12.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.12.ffn_up.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.12.ffn_down.weight, torch.float16 --> Q8_0, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.12.attn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.12.ffn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.13.attn_q.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.13.attn_k.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.13.attn_v.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.13.attn_output.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.13.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.13.ffn_up.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.13.ffn_down.weight, torch.float16 --> Q8_0, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.13.attn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.13.ffn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.14.attn_q.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.14.attn_k.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.14.attn_v.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.14.attn_output.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.14.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.14.ffn_up.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.14.ffn_down.weight, torch.float16 --> Q8_0, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.14.attn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.14.ffn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.15.attn_q.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.15.attn_k.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.15.attn_v.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.15.attn_output.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.15.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.15.ffn_up.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.15.ffn_down.weight, torch.float16 --> Q8_0, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.15.attn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.15.ffn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.16.attn_q.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.16.attn_k.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.16.attn_v.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.16.attn_output.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.16.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.16.ffn_up.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.16.ffn_down.weight, torch.float16 --> Q8_0, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.16.attn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.16.ffn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.17.attn_q.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.17.attn_k.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.17.attn_v.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.17.attn_output.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.17.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.17.ffn_up.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.17.ffn_down.weight, torch.float16 --> Q8_0, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.17.attn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.17.ffn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.18.attn_q.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.18.attn_k.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.18.attn_v.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.18.attn_output.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.18.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.18.ffn_up.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.18.ffn_down.weight, torch.float16 --> Q8_0, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.18.attn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.18.ffn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.19.attn_q.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.19.attn_k.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.19.attn_v.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.19.attn_output.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.19.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.19.ffn_up.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.19.ffn_down.weight, torch.float16 --> Q8_0, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.19.attn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.19.ffn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.20.attn_q.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.20.attn_k.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.20.attn_v.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.20.attn_output.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.20.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.20.ffn_up.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:gguf: loading model part 'pytorch_model-00002-of-00002.bin'\n", "INFO:hf-to-gguf:blk.20.ffn_down.weight, torch.float16 --> Q8_0, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.20.attn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.20.ffn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.21.attn_q.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.21.attn_k.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.21.attn_v.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.21.attn_output.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.21.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.21.ffn_up.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.21.ffn_down.weight, torch.float16 --> Q8_0, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.21.attn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.21.ffn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.22.attn_q.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.22.attn_k.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.22.attn_v.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.22.attn_output.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.22.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.22.ffn_up.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.22.ffn_down.weight, torch.float16 --> Q8_0, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.22.attn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.22.ffn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.23.attn_q.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.23.attn_k.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.23.attn_v.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.23.attn_output.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.23.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.23.ffn_up.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.23.ffn_down.weight, torch.float16 --> Q8_0, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.23.attn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.23.ffn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.24.attn_q.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.24.attn_k.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.24.attn_v.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.24.attn_output.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.24.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.24.ffn_up.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.24.ffn_down.weight, torch.float16 --> Q8_0, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.24.attn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.24.ffn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.25.attn_q.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.25.attn_k.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.25.attn_v.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.25.attn_output.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.25.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.25.ffn_up.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.25.ffn_down.weight, torch.float16 --> Q8_0, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.25.attn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.25.ffn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.26.attn_q.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.26.attn_k.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.26.attn_v.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.26.attn_output.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.26.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.26.ffn_up.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.26.ffn_down.weight, torch.float16 --> Q8_0, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.26.attn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.26.ffn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.27.attn_q.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.27.attn_k.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.27.attn_v.weight, torch.float16 --> Q8_0, shape = {3072, 1024}\n", "INFO:hf-to-gguf:blk.27.attn_output.weight, torch.float16 --> Q8_0, shape = {3072, 3072}\n", "INFO:hf-to-gguf:blk.27.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.27.ffn_up.weight, torch.float16 --> Q8_0, shape = {3072, 8192}\n", "INFO:hf-to-gguf:blk.27.ffn_down.weight, torch.float16 --> Q8_0, shape = {8192, 3072}\n", "INFO:hf-to-gguf:blk.27.attn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:blk.27.ffn_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:output_norm.weight, torch.float16 --> F32, shape = {3072}\n", "INFO:hf-to-gguf:Set meta model\n", "INFO:hf-to-gguf:Set model parameters\n", "INFO:hf-to-gguf:gguf: context length = 131072\n", "INFO:hf-to-gguf:gguf: embedding length = 3072\n", "INFO:hf-to-gguf:gguf: feed forward length = 8192\n", "INFO:hf-to-gguf:gguf: head count = 24\n", "INFO:hf-to-gguf:gguf: key-value head count = 8\n", "INFO:hf-to-gguf:gguf: rope theta = 500000.0\n", "INFO:hf-to-gguf:gguf: rms norm epsilon = 1e-05\n", "INFO:hf-to-gguf:gguf: file type = 7\n", "INFO:hf-to-gguf:Set model tokenizer\n", "INFO:numexpr.utils:NumExpr defaulting to 2 threads.\n", "2025-01-27 09:31:10.231812: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", "2025-01-27 09:31:10.258410: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", "2025-01-27 09:31:10.266774: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", "2025-01-27 09:31:12.483815: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n", "INFO:gguf.vocab:Adding 280147 merge(s).\n", "INFO:gguf.vocab:Setting special token type bos to 128000\n", "INFO:gguf.vocab:Setting special token type eos to 128001\n", "INFO:gguf.vocab:Setting special token type pad to 128004\n", "INFO:gguf.vocab:Setting add_bos_token to True\n", "INFO:hf-to-gguf:Set model quantization version\n", "INFO:gguf.gguf_writer:Writing the following files:\n", "INFO:gguf.gguf_writer:/content/RushabhShah122000/model/unsloth.Q8_0.gguf: n_tensors = 255, total_size = 3.4G\n", "Writing: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 3.41G/3.41G [01:13<00:00, 46.6Mbyte/s]\n", "INFO:hf-to-gguf:Model successfully exported to /content/RushabhShah122000/model/unsloth.Q8_0.gguf\n", "Unsloth: Conversion completed! Output location: /content/RushabhShah122000/model/unsloth.Q8_0.gguf\n", "Unsloth: Uploading GGUF to Huggingface Hub...\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "9dd544ba91794c52a1da18ea83a7a064", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/1 [00:00