#!/bin/bash # ๐Ÿ”„ Convert fine-tuned model to GGUF format for Ollama # This script converts your custom fine-tuned model to GGUF format set -e # Colors for output RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' NC='\033[0m' # No Color echo -e "${BLUE}๐Ÿ”„ Converting Model to GGUF Format${NC}" echo "=====================================" # Configuration MODEL_DIR="./fine_tuned_model" OUTPUT_FILE="my_custom_model.gguf" LLAMA_CPP_DIR="./llama.cpp" # Check if fine-tuned model exists if [ ! -d "$MODEL_DIR" ]; then echo -e "${RED}โŒ Fine-tuned model not found at: $MODEL_DIR${NC}" echo "Run fine-tuning first: python create_custom_model.py (option 2)" exit 1 fi echo -e "${GREEN}โœ… Found fine-tuned model at: $MODEL_DIR${NC}" # Check if llama.cpp exists, if not clone it if [ ! -d "$LLAMA_CPP_DIR" ]; then echo -e "${YELLOW}๐Ÿ“ฅ Cloning llama.cpp...${NC}" git clone https://github.com/ggerganov/llama.cpp.git echo -e "${YELLOW}๐Ÿ”จ Building llama.cpp...${NC}" cd llama.cpp # Build with CUDA support if available if command -v nvcc &> /dev/null; then echo -e "${GREEN}๐Ÿš€ NVIDIA CUDA detected, building with GPU support${NC}" make LLAMA_CUBLAS=1 -j$(nproc) else echo -e "${YELLOW}โš ๏ธ No CUDA detected, building CPU-only version${NC}" make -j$(nproc) fi cd .. else echo -e "${GREEN}โœ… llama.cpp already exists${NC}" fi # Check required Python dependencies echo -e "${BLUE}๐Ÿ“ฆ Checking Python dependencies...${NC}" python3 -c "import torch, transformers, sentencepiece" 2>/dev/null || { echo -e "${YELLOW}โš ๏ธ Installing missing dependencies...${NC}" pip install torch transformers sentencepiece protobuf } # Convert model to GGUF echo -e "${BLUE}๐Ÿ”„ Converting to GGUF format...${NC}" echo "This may take several minutes..." # Method 1: Direct conversion (recommended) if [ -f "$LLAMA_CPP_DIR/convert.py" ]; then echo -e "${GREEN}Using convert.py${NC}" python3 "$LLAMA_CPP_DIR/convert.py" \ "$MODEL_DIR" \ --outtype f16 \ --outfile "$OUTPUT_FILE" else # Method 2: Convert via HF format (fallback) echo -e "${YELLOW}Using alternative conversion method${NC}" python3 -c " import torch from transformers import AutoModelForCausalLM, AutoTokenizer import sys import os print('Loading model...') model = AutoModelForCausalLM.from_pretrained('$MODEL_DIR', torch_dtype=torch.float16) tokenizer = AutoTokenizer.from_pretrained('$MODEL_DIR') print('Saving in HF format...') model.save_pretrained('./temp_hf_model', safe_serialization=True) tokenizer.save_pretrained('./temp_hf_model') print('Conversion to HF format complete') " # Then convert HF to GGUF if [ -d "./temp_hf_model" ]; then python3 "$LLAMA_CPP_DIR/convert.py" \ "./temp_hf_model" \ --outtype f16 \ --outfile "$OUTPUT_FILE" rm -rf ./temp_hf_model fi fi # Verify conversion if [ -f "$OUTPUT_FILE" ]; then FILE_SIZE=$(du -h "$OUTPUT_FILE" | cut -f1) echo echo -e "${GREEN}๐ŸŽ‰ Conversion successful!${NC}" echo -e "${BLUE}๐Ÿ“„ Output file: $OUTPUT_FILE${NC}" echo -e "${BLUE}๐Ÿ“Š File size: $FILE_SIZE${NC}" # Optional: Quantize to smaller sizes echo echo -e "${YELLOW}๐Ÿ’ก Optional: Create quantized versions?${NC}" read -p "Create Q4_K_M quantized version? (y/N): " -n 1 -r echo if [[ $REPLY =~ ^[Yy]$ ]]; then echo -e "${BLUE}๐Ÿ”„ Creating Q4_K_M quantized version...${NC}" "$LLAMA_CPP_DIR/quantize" "$OUTPUT_FILE" "${OUTPUT_FILE%.gguf}_q4_k_m.gguf" Q4_K_M if [ -f "${OUTPUT_FILE%.gguf}_q4_k_m.gguf" ]; then QUANT_SIZE=$(du -h "${OUTPUT_FILE%.gguf}_q4_k_m.gguf" | cut -f1) echo -e "${GREEN}โœ… Quantized version created: ${OUTPUT_FILE%.gguf}_q4_k_m.gguf ($QUANT_SIZE)${NC}" fi fi # Test the converted model echo echo -e "${YELLOW}๐Ÿงช Test the converted model?${NC}" read -p "Run a quick test? (y/N): " -n 1 -r echo if [[ $REPLY =~ ^[Yy]$ ]]; then echo -e "${BLUE}๐Ÿงช Testing model...${NC}" echo "Prompt: 'Hello, how are you?'" echo "Response:" "$LLAMA_CPP_DIR/main" -m "$OUTPUT_FILE" -p "Hello, how are you?" -n 50 --temp 0.7 fi else echo -e "${RED}โŒ Conversion failed!${NC}" echo "Check the error messages above." exit 1 fi # Instructions for next steps echo echo -e "${GREEN}๐ŸŽฏ Next Steps:${NC}" echo "1. Create Ollama Modelfile:" echo " python create_custom_model.py # option 4" echo echo "2. Import to Ollama:" echo " ollama create my-custom-model -f Modelfile" echo echo "3. Test in Ollama:" echo " ollama run my-custom-model \"Hello!\"" echo echo "4. Push to Ollama Library:" echo " ollama push my-custom-model" echo echo -e "${BLUE}๐Ÿ“š Files created:${NC}" echo " โ€ข $OUTPUT_FILE (F16 version)" if [ -f "${OUTPUT_FILE%.gguf}_q4_k_m.gguf" ]; then echo " โ€ข ${OUTPUT_FILE%.gguf}_q4_k_m.gguf (Quantized version)" fi echo echo -e "${GREEN}๐ŸŽ‰ GGUF conversion completed successfully!${NC}"