Spaces:

saemstunes
/

STA-AI

Running

File size: 6,602 Bytes

e7e5eda

#!/bin/bash
set -e

echo "🚀 COMPREHENSIVE PHI-3.5-MINI CONVERSION & QUANTIZATION SYSTEM"
echo "================================================================"

# Configuration
MODEL_NAME="microsoft/Phi-3.5-mini-instruct"
OUTPUT_DIR="./phi3.5-mini-gguf-complete"
QUANT_TYPES=("Q2_K" "Q3_K_S" "Q3_K_M" "Q3_K_L" "Q4_0" "Q4_1" "Q4_K_S" "Q4_K_M" "Q5_0" "Q5_1" "Q5_K_S" "Q5_K_M" "Q6_K" "Q8_0" "F16" "F32")

# Create directories
mkdir -p $OUTPUT_DIR
mkdir -p $OUTPUT_DIR/artifacts
mkdir -p $OUTPUT_DIR/logs

# Install system dependencies
echo "📦 INSTALLING SYSTEM DEPENDENCIES"
sudo apt-get update
sudo apt-get install -y \
    build-essential \
    cmake \
    git \
    python3 \
    python3-pip \
    python3-venv \
    libcurl4-openssl-dev \
    git-lfs \
    wget \
    curl \
    pkg-config \
    libopenblas-dev \
    libatomic-dev

# Initialize Git LFS
git lfs install

# Create Python virtual environment
python3 -m venv phi3-convert
source phi3-convert/bin/activate

# Install Python dependencies
pip install --upgrade pip
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
pip install transformers>=4.43.0
pip install safetensors
pip install huggingface-hub
pip install accelerate
pip install protobuf
pip install sentencepiece
pip install ninja
pip install requests
pip install tqdm
pip install psutil
pip install GPUtil

# Clone and build llama.cpp with all optimizations
echo "🔨 BUILDING LLAMA.CPP WITH FULL OPTIMIZATIONS"
if [ ! -d "llama.cpp" ]; then
    git clone https://github.com/ggerganov/llama.cpp
fi

cd llama.cpp

# Clean build directory
rm -rf build
mkdir -p build
cd build

# Configure with maximum optimizations
cmake .. \
    -DCMAKE_BUILD_TYPE=Release \
    -DLLAMA_NATIVE=ON \
    -DLLAMA_CURL=ON \
    -DLLAMA_OPENBLAS=ON \
    -DLLAMA_BLAS=ON \
    -DBLAS_LIBRARIES=/usr/lib/openblas-base/libopenblas.a \
    -DLLAMA_CUDA=OFF \
    -DLLAMA_METAL=OFF \
    -DLLAMA_MPI=OFF \
    -DLLAMA_CCACHE=ON \
    -DLLAMA_BUILD_TESTS=ON \
    -DLLAMA_BUILD_EXAMPLES=ON

# Build with maximum parallelism
make -j$(nproc)

# Return to main directory
cd ../..

# Download the model
echo "📥 DOWNLOADING PHI-3.5-MINI-INSTRUCT MODEL"
if [ ! -d "Phi-3.5-mini-instruct" ]; then
    git clone https://huggingface.co/microsoft/Phi-3.5-mini-instruct
fi

# Verify model files
echo "🔍 VERIFYING MODEL FILES"
cd Phi-3.5-mini-instruct
ls -la
cd ..

# Create comprehensive conversion script
cat > complete_conversion.py << 'EOF'
import os
import sys
import subprocess
import json
import time
from datetime import datetime

class Phi3Converter:
    def __init__(self, model_path, output_dir):
        self.model_path = model_path
        self.output_dir = output_dir
        self.quant_types = ["Q2_K", "Q3_K_S", "Q3_K_M", "Q3_K_L", "Q4_0", "Q4_1", 
                           "Q4_K_S", "Q4_K_M", "Q5_0", "Q5_1", "Q5_K_S", "Q5_K_M", 
                           "Q6_K", "Q8_0", "F16", "F32"]
        
    def run_command(self, cmd, description):
        print(f"🔄 {description}")
        print(f"   Command: {cmd}")
        
        start_time = time.time()
        result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
        end_time = time.time()
        
        if result.returncode != 0:
            print(f"❌ Error: {result.stderr}")
            return False
        
        print(f"✅ Completed in {end_time - start_time:.2f}s")
        return True
    
    def convert_to_gguf(self, outtype="f16"):
        output_file = f"{self.output_dir}/phi3.5-mini.{outtype}.gguf"
        cmd = f"python3 llama.cpp/convert-hf-to-gguf.py {self.model_path} --outfile {output_file} --outtype {outtype}"
        
        return self.run_command(cmd, f"Converting to GGUF ({outtype})")
    
    def quantize_model(self, input_file, quant_type):
        output_file = f"{self.output_dir}/phi3.5-mini.{quant_type}.gguf"
        cmd = f"./llama.cpp/build/bin/quantize {input_file} {output_file} {quant_type}"
        
        return self.run_command(cmd, f"Quantizing to {quant_type}")
    
    def generate_model_card(self):
        model_card = {
            "model_name": "Phi-3.5-mini-instruct-GGUF-Comprehensive",
            "base_model": "microsoft/Phi-3.5-mini-instruct",
            "description": "Comprehensive GGUF quantization of Phi-3.5-mini-instruct for Saem's Tunes",
            "quantizations": self.quant_types,
            "conversion_date": datetime.now().isoformat(),
            "file_sizes": {},
            "recommended_use_cases": {
                "Q2_K": "Mobile devices, extremely low memory",
                "Q3_K_S": "Mobile devices, low memory",
                "Q4_0": "Good balance for most use cases",
                "Q4_K_M": "Recommended balance (quality/speed)",
                "Q5_K_M": "High quality with reasonable size",
                "Q8_0": "Maximum quality, larger size",
                "F16": "Original quality, large size"
            }
        }
        
        with open(f"{self.output_dir}/model_card.json", "w") as f:
            json.dump(model_card, f, indent=2)
    
    def run_comprehensive_conversion(self):
        print("🚀 STARTING COMPREHENSIVE CONVERSION PIPELINE")
        
        # Convert to F16 first
        if not self.convert_to_gguf("f16"):
            return False
        
        base_model = f"{self.output_dir}/phi3.5-mini.f16.gguf"
        
        # Create all quantization variants
        for quant_type in self.quant_types:
            if quant_type not in ["F16", "F32"]:
                self.quantize_model(base_model, quant_type)
        
        # Convert to F32 separately
        self.convert_to_gguf("f32")
        
        # Generate model card
        self.generate_model_card()
        
        print("🎉 COMPREHENSIVE CONVERSION COMPLETED!")
        return True

if __name__ == "__main__":
    converter = Phi3Converter("Phi-3.5-mini-instruct", "phi3.5-mini-gguf-complete")
    converter.run_comprehensive_conversion()
EOF

# Run the comprehensive conversion
echo "🏃 STARTING COMPREHENSIVE CONVERSION PROCESS"
python3 complete_conversion.py

# Generate file manifest
echo "📊 GENERATING FILE MANIFEST"
ls -la phi3.5-mini-gguf-complete/ > phi3.5-mini-gguf-complete/file_manifest.txt

# Create deployment package
echo "📦 CREATING DEPLOYMENT PACKAGE"
tar -czvf phi3.5-mini-complete-gguf-$(date +%Y%m%d).tar.gz phi3.5-mini-gguf-complete/

echo "🎉 COMPLETE CONVERSION SYSTEM FINISHED!"
echo "📍 Output directory: phi3.5-mini-gguf-complete/"
echo "📁 Total files generated: $(ls -1 phi3.5-mini-gguf-complete/*.gguf | wc -l)"