File size: 6,602 Bytes
e7e5eda
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
#!/bin/bash
set -e

echo "πŸš€ COMPREHENSIVE PHI-3.5-MINI CONVERSION & QUANTIZATION SYSTEM"
echo "================================================================"

# Configuration
MODEL_NAME="microsoft/Phi-3.5-mini-instruct"
OUTPUT_DIR="./phi3.5-mini-gguf-complete"
QUANT_TYPES=("Q2_K" "Q3_K_S" "Q3_K_M" "Q3_K_L" "Q4_0" "Q4_1" "Q4_K_S" "Q4_K_M" "Q5_0" "Q5_1" "Q5_K_S" "Q5_K_M" "Q6_K" "Q8_0" "F16" "F32")

# Create directories
mkdir -p $OUTPUT_DIR
mkdir -p $OUTPUT_DIR/artifacts
mkdir -p $OUTPUT_DIR/logs

# Install system dependencies
echo "πŸ“¦ INSTALLING SYSTEM DEPENDENCIES"
sudo apt-get update
sudo apt-get install -y \
    build-essential \
    cmake \
    git \
    python3 \
    python3-pip \
    python3-venv \
    libcurl4-openssl-dev \
    git-lfs \
    wget \
    curl \
    pkg-config \
    libopenblas-dev \
    libatomic-dev

# Initialize Git LFS
git lfs install

# Create Python virtual environment
python3 -m venv phi3-convert
source phi3-convert/bin/activate

# Install Python dependencies
pip install --upgrade pip
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
pip install transformers>=4.43.0
pip install safetensors
pip install huggingface-hub
pip install accelerate
pip install protobuf
pip install sentencepiece
pip install ninja
pip install requests
pip install tqdm
pip install psutil
pip install GPUtil

# Clone and build llama.cpp with all optimizations
echo "πŸ”¨ BUILDING LLAMA.CPP WITH FULL OPTIMIZATIONS"
if [ ! -d "llama.cpp" ]; then
    git clone https://github.com/ggerganov/llama.cpp
fi

cd llama.cpp

# Clean build directory
rm -rf build
mkdir -p build
cd build

# Configure with maximum optimizations
cmake .. \
    -DCMAKE_BUILD_TYPE=Release \
    -DLLAMA_NATIVE=ON \
    -DLLAMA_CURL=ON \
    -DLLAMA_OPENBLAS=ON \
    -DLLAMA_BLAS=ON \
    -DBLAS_LIBRARIES=/usr/lib/openblas-base/libopenblas.a \
    -DLLAMA_CUDA=OFF \
    -DLLAMA_METAL=OFF \
    -DLLAMA_MPI=OFF \
    -DLLAMA_CCACHE=ON \
    -DLLAMA_BUILD_TESTS=ON \
    -DLLAMA_BUILD_EXAMPLES=ON

# Build with maximum parallelism
make -j$(nproc)

# Return to main directory
cd ../..

# Download the model
echo "πŸ“₯ DOWNLOADING PHI-3.5-MINI-INSTRUCT MODEL"
if [ ! -d "Phi-3.5-mini-instruct" ]; then
    git clone https://huggingface.co/microsoft/Phi-3.5-mini-instruct
fi

# Verify model files
echo "πŸ” VERIFYING MODEL FILES"
cd Phi-3.5-mini-instruct
ls -la
cd ..

# Create comprehensive conversion script
cat > complete_conversion.py << 'EOF'
import os
import sys
import subprocess
import json
import time
from datetime import datetime

class Phi3Converter:
    def __init__(self, model_path, output_dir):
        self.model_path = model_path
        self.output_dir = output_dir
        self.quant_types = ["Q2_K", "Q3_K_S", "Q3_K_M", "Q3_K_L", "Q4_0", "Q4_1", 
                           "Q4_K_S", "Q4_K_M", "Q5_0", "Q5_1", "Q5_K_S", "Q5_K_M", 
                           "Q6_K", "Q8_0", "F16", "F32"]
        
    def run_command(self, cmd, description):
        print(f"πŸ”„ {description}")
        print(f"   Command: {cmd}")
        
        start_time = time.time()
        result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
        end_time = time.time()
        
        if result.returncode != 0:
            print(f"❌ Error: {result.stderr}")
            return False
        
        print(f"βœ… Completed in {end_time - start_time:.2f}s")
        return True
    
    def convert_to_gguf(self, outtype="f16"):
        output_file = f"{self.output_dir}/phi3.5-mini.{outtype}.gguf"
        cmd = f"python3 llama.cpp/convert-hf-to-gguf.py {self.model_path} --outfile {output_file} --outtype {outtype}"
        
        return self.run_command(cmd, f"Converting to GGUF ({outtype})")
    
    def quantize_model(self, input_file, quant_type):
        output_file = f"{self.output_dir}/phi3.5-mini.{quant_type}.gguf"
        cmd = f"./llama.cpp/build/bin/quantize {input_file} {output_file} {quant_type}"
        
        return self.run_command(cmd, f"Quantizing to {quant_type}")
    
    def generate_model_card(self):
        model_card = {
            "model_name": "Phi-3.5-mini-instruct-GGUF-Comprehensive",
            "base_model": "microsoft/Phi-3.5-mini-instruct",
            "description": "Comprehensive GGUF quantization of Phi-3.5-mini-instruct for Saem's Tunes",
            "quantizations": self.quant_types,
            "conversion_date": datetime.now().isoformat(),
            "file_sizes": {},
            "recommended_use_cases": {
                "Q2_K": "Mobile devices, extremely low memory",
                "Q3_K_S": "Mobile devices, low memory",
                "Q4_0": "Good balance for most use cases",
                "Q4_K_M": "Recommended balance (quality/speed)",
                "Q5_K_M": "High quality with reasonable size",
                "Q8_0": "Maximum quality, larger size",
                "F16": "Original quality, large size"
            }
        }
        
        with open(f"{self.output_dir}/model_card.json", "w") as f:
            json.dump(model_card, f, indent=2)
    
    def run_comprehensive_conversion(self):
        print("πŸš€ STARTING COMPREHENSIVE CONVERSION PIPELINE")
        
        # Convert to F16 first
        if not self.convert_to_gguf("f16"):
            return False
        
        base_model = f"{self.output_dir}/phi3.5-mini.f16.gguf"
        
        # Create all quantization variants
        for quant_type in self.quant_types:
            if quant_type not in ["F16", "F32"]:
                self.quantize_model(base_model, quant_type)
        
        # Convert to F32 separately
        self.convert_to_gguf("f32")
        
        # Generate model card
        self.generate_model_card()
        
        print("πŸŽ‰ COMPREHENSIVE CONVERSION COMPLETED!")
        return True

if __name__ == "__main__":
    converter = Phi3Converter("Phi-3.5-mini-instruct", "phi3.5-mini-gguf-complete")
    converter.run_comprehensive_conversion()
EOF

# Run the comprehensive conversion
echo "πŸƒ STARTING COMPREHENSIVE CONVERSION PROCESS"
python3 complete_conversion.py

# Generate file manifest
echo "πŸ“Š GENERATING FILE MANIFEST"
ls -la phi3.5-mini-gguf-complete/ > phi3.5-mini-gguf-complete/file_manifest.txt

# Create deployment package
echo "πŸ“¦ CREATING DEPLOYMENT PACKAGE"
tar -czvf phi3.5-mini-complete-gguf-$(date +%Y%m%d).tar.gz phi3.5-mini-gguf-complete/

echo "πŸŽ‰ COMPLETE CONVERSION SYSTEM FINISHED!"
echo "πŸ“ Output directory: phi3.5-mini-gguf-complete/"
echo "πŸ“ Total files generated: $(ls -1 phi3.5-mini-gguf-complete/*.gguf | wc -l)"