WronAI / convert_to_gguf.sh
softreck's picture
Upload folder using huggingface_hub
0fa4cc9 verified
#!/bin/bash
# πŸ”„ Convert fine-tuned model to GGUF format for Ollama
# This script converts your custom fine-tuned model to GGUF format
set -e
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
echo -e "${BLUE}πŸ”„ Converting Model to GGUF Format${NC}"
echo "====================================="
# Configuration
MODEL_DIR="./fine_tuned_model"
OUTPUT_FILE="my_custom_model.gguf"
LLAMA_CPP_DIR="./llama.cpp"
# Check if fine-tuned model exists
if [ ! -d "$MODEL_DIR" ]; then
echo -e "${RED}❌ Fine-tuned model not found at: $MODEL_DIR${NC}"
echo "Run fine-tuning first: python create_custom_model.py (option 2)"
exit 1
fi
echo -e "${GREEN}βœ… Found fine-tuned model at: $MODEL_DIR${NC}"
# Check if llama.cpp exists, if not clone it
if [ ! -d "$LLAMA_CPP_DIR" ]; then
echo -e "${YELLOW}πŸ“₯ Cloning llama.cpp...${NC}"
git clone https://github.com/ggerganov/llama.cpp.git
echo -e "${YELLOW}πŸ”¨ Building llama.cpp...${NC}"
cd llama.cpp
# Build with CUDA support if available
if command -v nvcc &> /dev/null; then
echo -e "${GREEN}πŸš€ NVIDIA CUDA detected, building with GPU support${NC}"
make LLAMA_CUBLAS=1 -j$(nproc)
else
echo -e "${YELLOW}⚠️ No CUDA detected, building CPU-only version${NC}"
make -j$(nproc)
fi
cd ..
else
echo -e "${GREEN}βœ… llama.cpp already exists${NC}"
fi
# Check required Python dependencies
echo -e "${BLUE}πŸ“¦ Checking Python dependencies...${NC}"
python3 -c "import torch, transformers, sentencepiece" 2>/dev/null || {
echo -e "${YELLOW}⚠️ Installing missing dependencies...${NC}"
pip install torch transformers sentencepiece protobuf
}
# Convert model to GGUF
echo -e "${BLUE}πŸ”„ Converting to GGUF format...${NC}"
echo "This may take several minutes..."
# Method 1: Direct conversion (recommended)
if [ -f "$LLAMA_CPP_DIR/convert.py" ]; then
echo -e "${GREEN}Using convert.py${NC}"
python3 "$LLAMA_CPP_DIR/convert.py" \
"$MODEL_DIR" \
--outtype f16 \
--outfile "$OUTPUT_FILE"
else
# Method 2: Convert via HF format (fallback)
echo -e "${YELLOW}Using alternative conversion method${NC}"
python3 -c "
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import sys
import os
print('Loading model...')
model = AutoModelForCausalLM.from_pretrained('$MODEL_DIR', torch_dtype=torch.float16)
tokenizer = AutoTokenizer.from_pretrained('$MODEL_DIR')
print('Saving in HF format...')
model.save_pretrained('./temp_hf_model', safe_serialization=True)
tokenizer.save_pretrained('./temp_hf_model')
print('Conversion to HF format complete')
"
# Then convert HF to GGUF
if [ -d "./temp_hf_model" ]; then
python3 "$LLAMA_CPP_DIR/convert.py" \
"./temp_hf_model" \
--outtype f16 \
--outfile "$OUTPUT_FILE"
rm -rf ./temp_hf_model
fi
fi
# Verify conversion
if [ -f "$OUTPUT_FILE" ]; then
FILE_SIZE=$(du -h "$OUTPUT_FILE" | cut -f1)
echo
echo -e "${GREEN}πŸŽ‰ Conversion successful!${NC}"
echo -e "${BLUE}πŸ“„ Output file: $OUTPUT_FILE${NC}"
echo -e "${BLUE}πŸ“Š File size: $FILE_SIZE${NC}"
# Optional: Quantize to smaller sizes
echo
echo -e "${YELLOW}πŸ’‘ Optional: Create quantized versions?${NC}"
read -p "Create Q4_K_M quantized version? (y/N): " -n 1 -r
echo
if [[ $REPLY =~ ^[Yy]$ ]]; then
echo -e "${BLUE}πŸ”„ Creating Q4_K_M quantized version...${NC}"
"$LLAMA_CPP_DIR/quantize" "$OUTPUT_FILE" "${OUTPUT_FILE%.gguf}_q4_k_m.gguf" Q4_K_M
if [ -f "${OUTPUT_FILE%.gguf}_q4_k_m.gguf" ]; then
QUANT_SIZE=$(du -h "${OUTPUT_FILE%.gguf}_q4_k_m.gguf" | cut -f1)
echo -e "${GREEN}βœ… Quantized version created: ${OUTPUT_FILE%.gguf}_q4_k_m.gguf ($QUANT_SIZE)${NC}"
fi
fi
# Test the converted model
echo
echo -e "${YELLOW}πŸ§ͺ Test the converted model?${NC}"
read -p "Run a quick test? (y/N): " -n 1 -r
echo
if [[ $REPLY =~ ^[Yy]$ ]]; then
echo -e "${BLUE}πŸ§ͺ Testing model...${NC}"
echo "Prompt: 'Hello, how are you?'"
echo "Response:"
"$LLAMA_CPP_DIR/main" -m "$OUTPUT_FILE" -p "Hello, how are you?" -n 50 --temp 0.7
fi
else
echo -e "${RED}❌ Conversion failed!${NC}"
echo "Check the error messages above."
exit 1
fi
# Instructions for next steps
echo
echo -e "${GREEN}🎯 Next Steps:${NC}"
echo "1. Create Ollama Modelfile:"
echo " python create_custom_model.py # option 4"
echo
echo "2. Import to Ollama:"
echo " ollama create my-custom-model -f Modelfile"
echo
echo "3. Test in Ollama:"
echo " ollama run my-custom-model \"Hello!\""
echo
echo "4. Push to Ollama Library:"
echo " ollama push my-custom-model"
echo
echo -e "${BLUE}πŸ“š Files created:${NC}"
echo " β€’ $OUTPUT_FILE (F16 version)"
if [ -f "${OUTPUT_FILE%.gguf}_q4_k_m.gguf" ]; then
echo " β€’ ${OUTPUT_FILE%.gguf}_q4_k_m.gguf (Quantized version)"
fi
echo
echo -e "${GREEN}πŸŽ‰ GGUF conversion completed successfully!${NC}"