STA-AI / convert_phi35_complete.sh
saemstunes's picture
Create convert_phi35_complete.sh
e7e5eda verified
#!/bin/bash
set -e
echo "πŸš€ COMPREHENSIVE PHI-3.5-MINI CONVERSION & QUANTIZATION SYSTEM"
echo "================================================================"
# Configuration
MODEL_NAME="microsoft/Phi-3.5-mini-instruct"
OUTPUT_DIR="./phi3.5-mini-gguf-complete"
QUANT_TYPES=("Q2_K" "Q3_K_S" "Q3_K_M" "Q3_K_L" "Q4_0" "Q4_1" "Q4_K_S" "Q4_K_M" "Q5_0" "Q5_1" "Q5_K_S" "Q5_K_M" "Q6_K" "Q8_0" "F16" "F32")
# Create directories
mkdir -p $OUTPUT_DIR
mkdir -p $OUTPUT_DIR/artifacts
mkdir -p $OUTPUT_DIR/logs
# Install system dependencies
echo "πŸ“¦ INSTALLING SYSTEM DEPENDENCIES"
sudo apt-get update
sudo apt-get install -y \
build-essential \
cmake \
git \
python3 \
python3-pip \
python3-venv \
libcurl4-openssl-dev \
git-lfs \
wget \
curl \
pkg-config \
libopenblas-dev \
libatomic-dev
# Initialize Git LFS
git lfs install
# Create Python virtual environment
python3 -m venv phi3-convert
source phi3-convert/bin/activate
# Install Python dependencies
pip install --upgrade pip
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
pip install transformers>=4.43.0
pip install safetensors
pip install huggingface-hub
pip install accelerate
pip install protobuf
pip install sentencepiece
pip install ninja
pip install requests
pip install tqdm
pip install psutil
pip install GPUtil
# Clone and build llama.cpp with all optimizations
echo "πŸ”¨ BUILDING LLAMA.CPP WITH FULL OPTIMIZATIONS"
if [ ! -d "llama.cpp" ]; then
git clone https://github.com/ggerganov/llama.cpp
fi
cd llama.cpp
# Clean build directory
rm -rf build
mkdir -p build
cd build
# Configure with maximum optimizations
cmake .. \
-DCMAKE_BUILD_TYPE=Release \
-DLLAMA_NATIVE=ON \
-DLLAMA_CURL=ON \
-DLLAMA_OPENBLAS=ON \
-DLLAMA_BLAS=ON \
-DBLAS_LIBRARIES=/usr/lib/openblas-base/libopenblas.a \
-DLLAMA_CUDA=OFF \
-DLLAMA_METAL=OFF \
-DLLAMA_MPI=OFF \
-DLLAMA_CCACHE=ON \
-DLLAMA_BUILD_TESTS=ON \
-DLLAMA_BUILD_EXAMPLES=ON
# Build with maximum parallelism
make -j$(nproc)
# Return to main directory
cd ../..
# Download the model
echo "πŸ“₯ DOWNLOADING PHI-3.5-MINI-INSTRUCT MODEL"
if [ ! -d "Phi-3.5-mini-instruct" ]; then
git clone https://huggingface.co/microsoft/Phi-3.5-mini-instruct
fi
# Verify model files
echo "πŸ” VERIFYING MODEL FILES"
cd Phi-3.5-mini-instruct
ls -la
cd ..
# Create comprehensive conversion script
cat > complete_conversion.py << 'EOF'
import os
import sys
import subprocess
import json
import time
from datetime import datetime
class Phi3Converter:
def __init__(self, model_path, output_dir):
self.model_path = model_path
self.output_dir = output_dir
self.quant_types = ["Q2_K", "Q3_K_S", "Q3_K_M", "Q3_K_L", "Q4_0", "Q4_1",
"Q4_K_S", "Q4_K_M", "Q5_0", "Q5_1", "Q5_K_S", "Q5_K_M",
"Q6_K", "Q8_0", "F16", "F32"]
def run_command(self, cmd, description):
print(f"πŸ”„ {description}")
print(f" Command: {cmd}")
start_time = time.time()
result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
end_time = time.time()
if result.returncode != 0:
print(f"❌ Error: {result.stderr}")
return False
print(f"βœ… Completed in {end_time - start_time:.2f}s")
return True
def convert_to_gguf(self, outtype="f16"):
output_file = f"{self.output_dir}/phi3.5-mini.{outtype}.gguf"
cmd = f"python3 llama.cpp/convert-hf-to-gguf.py {self.model_path} --outfile {output_file} --outtype {outtype}"
return self.run_command(cmd, f"Converting to GGUF ({outtype})")
def quantize_model(self, input_file, quant_type):
output_file = f"{self.output_dir}/phi3.5-mini.{quant_type}.gguf"
cmd = f"./llama.cpp/build/bin/quantize {input_file} {output_file} {quant_type}"
return self.run_command(cmd, f"Quantizing to {quant_type}")
def generate_model_card(self):
model_card = {
"model_name": "Phi-3.5-mini-instruct-GGUF-Comprehensive",
"base_model": "microsoft/Phi-3.5-mini-instruct",
"description": "Comprehensive GGUF quantization of Phi-3.5-mini-instruct for Saem's Tunes",
"quantizations": self.quant_types,
"conversion_date": datetime.now().isoformat(),
"file_sizes": {},
"recommended_use_cases": {
"Q2_K": "Mobile devices, extremely low memory",
"Q3_K_S": "Mobile devices, low memory",
"Q4_0": "Good balance for most use cases",
"Q4_K_M": "Recommended balance (quality/speed)",
"Q5_K_M": "High quality with reasonable size",
"Q8_0": "Maximum quality, larger size",
"F16": "Original quality, large size"
}
}
with open(f"{self.output_dir}/model_card.json", "w") as f:
json.dump(model_card, f, indent=2)
def run_comprehensive_conversion(self):
print("πŸš€ STARTING COMPREHENSIVE CONVERSION PIPELINE")
# Convert to F16 first
if not self.convert_to_gguf("f16"):
return False
base_model = f"{self.output_dir}/phi3.5-mini.f16.gguf"
# Create all quantization variants
for quant_type in self.quant_types:
if quant_type not in ["F16", "F32"]:
self.quantize_model(base_model, quant_type)
# Convert to F32 separately
self.convert_to_gguf("f32")
# Generate model card
self.generate_model_card()
print("πŸŽ‰ COMPREHENSIVE CONVERSION COMPLETED!")
return True
if __name__ == "__main__":
converter = Phi3Converter("Phi-3.5-mini-instruct", "phi3.5-mini-gguf-complete")
converter.run_comprehensive_conversion()
EOF
# Run the comprehensive conversion
echo "πŸƒ STARTING COMPREHENSIVE CONVERSION PROCESS"
python3 complete_conversion.py
# Generate file manifest
echo "πŸ“Š GENERATING FILE MANIFEST"
ls -la phi3.5-mini-gguf-complete/ > phi3.5-mini-gguf-complete/file_manifest.txt
# Create deployment package
echo "πŸ“¦ CREATING DEPLOYMENT PACKAGE"
tar -czvf phi3.5-mini-complete-gguf-$(date +%Y%m%d).tar.gz phi3.5-mini-gguf-complete/
echo "πŸŽ‰ COMPLETE CONVERSION SYSTEM FINISHED!"
echo "πŸ“ Output directory: phi3.5-mini-gguf-complete/"
echo "πŸ“ Total files generated: $(ls -1 phi3.5-mini-gguf-complete/*.gguf | wc -l)"