saemstunes commited on
Commit
e7e5eda
Β·
verified Β·
1 Parent(s): bcc17fb

Create convert_phi35_complete.sh

Browse files
Files changed (1) hide show
  1. convert_phi35_complete.sh +211 -0
convert_phi35_complete.sh ADDED
@@ -0,0 +1,211 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ set -e
3
+
4
+ echo "πŸš€ COMPREHENSIVE PHI-3.5-MINI CONVERSION & QUANTIZATION SYSTEM"
5
+ echo "================================================================"
6
+
7
+ # Configuration
8
+ MODEL_NAME="microsoft/Phi-3.5-mini-instruct"
9
+ OUTPUT_DIR="./phi3.5-mini-gguf-complete"
10
+ QUANT_TYPES=("Q2_K" "Q3_K_S" "Q3_K_M" "Q3_K_L" "Q4_0" "Q4_1" "Q4_K_S" "Q4_K_M" "Q5_0" "Q5_1" "Q5_K_S" "Q5_K_M" "Q6_K" "Q8_0" "F16" "F32")
11
+
12
+ # Create directories
13
+ mkdir -p $OUTPUT_DIR
14
+ mkdir -p $OUTPUT_DIR/artifacts
15
+ mkdir -p $OUTPUT_DIR/logs
16
+
17
+ # Install system dependencies
18
+ echo "πŸ“¦ INSTALLING SYSTEM DEPENDENCIES"
19
+ sudo apt-get update
20
+ sudo apt-get install -y \
21
+ build-essential \
22
+ cmake \
23
+ git \
24
+ python3 \
25
+ python3-pip \
26
+ python3-venv \
27
+ libcurl4-openssl-dev \
28
+ git-lfs \
29
+ wget \
30
+ curl \
31
+ pkg-config \
32
+ libopenblas-dev \
33
+ libatomic-dev
34
+
35
+ # Initialize Git LFS
36
+ git lfs install
37
+
38
+ # Create Python virtual environment
39
+ python3 -m venv phi3-convert
40
+ source phi3-convert/bin/activate
41
+
42
+ # Install Python dependencies
43
+ pip install --upgrade pip
44
+ pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
45
+ pip install transformers>=4.43.0
46
+ pip install safetensors
47
+ pip install huggingface-hub
48
+ pip install accelerate
49
+ pip install protobuf
50
+ pip install sentencepiece
51
+ pip install ninja
52
+ pip install requests
53
+ pip install tqdm
54
+ pip install psutil
55
+ pip install GPUtil
56
+
57
+ # Clone and build llama.cpp with all optimizations
58
+ echo "πŸ”¨ BUILDING LLAMA.CPP WITH FULL OPTIMIZATIONS"
59
+ if [ ! -d "llama.cpp" ]; then
60
+ git clone https://github.com/ggerganov/llama.cpp
61
+ fi
62
+
63
+ cd llama.cpp
64
+
65
+ # Clean build directory
66
+ rm -rf build
67
+ mkdir -p build
68
+ cd build
69
+
70
+ # Configure with maximum optimizations
71
+ cmake .. \
72
+ -DCMAKE_BUILD_TYPE=Release \
73
+ -DLLAMA_NATIVE=ON \
74
+ -DLLAMA_CURL=ON \
75
+ -DLLAMA_OPENBLAS=ON \
76
+ -DLLAMA_BLAS=ON \
77
+ -DBLAS_LIBRARIES=/usr/lib/openblas-base/libopenblas.a \
78
+ -DLLAMA_CUDA=OFF \
79
+ -DLLAMA_METAL=OFF \
80
+ -DLLAMA_MPI=OFF \
81
+ -DLLAMA_CCACHE=ON \
82
+ -DLLAMA_BUILD_TESTS=ON \
83
+ -DLLAMA_BUILD_EXAMPLES=ON
84
+
85
+ # Build with maximum parallelism
86
+ make -j$(nproc)
87
+
88
+ # Return to main directory
89
+ cd ../..
90
+
91
+ # Download the model
92
+ echo "πŸ“₯ DOWNLOADING PHI-3.5-MINI-INSTRUCT MODEL"
93
+ if [ ! -d "Phi-3.5-mini-instruct" ]; then
94
+ git clone https://huggingface.co/microsoft/Phi-3.5-mini-instruct
95
+ fi
96
+
97
+ # Verify model files
98
+ echo "πŸ” VERIFYING MODEL FILES"
99
+ cd Phi-3.5-mini-instruct
100
+ ls -la
101
+ cd ..
102
+
103
+ # Create comprehensive conversion script
104
+ cat > complete_conversion.py << 'EOF'
105
+ import os
106
+ import sys
107
+ import subprocess
108
+ import json
109
+ import time
110
+ from datetime import datetime
111
+
112
+ class Phi3Converter:
113
+ def __init__(self, model_path, output_dir):
114
+ self.model_path = model_path
115
+ self.output_dir = output_dir
116
+ self.quant_types = ["Q2_K", "Q3_K_S", "Q3_K_M", "Q3_K_L", "Q4_0", "Q4_1",
117
+ "Q4_K_S", "Q4_K_M", "Q5_0", "Q5_1", "Q5_K_S", "Q5_K_M",
118
+ "Q6_K", "Q8_0", "F16", "F32"]
119
+
120
+ def run_command(self, cmd, description):
121
+ print(f"πŸ”„ {description}")
122
+ print(f" Command: {cmd}")
123
+
124
+ start_time = time.time()
125
+ result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
126
+ end_time = time.time()
127
+
128
+ if result.returncode != 0:
129
+ print(f"❌ Error: {result.stderr}")
130
+ return False
131
+
132
+ print(f"βœ… Completed in {end_time - start_time:.2f}s")
133
+ return True
134
+
135
+ def convert_to_gguf(self, outtype="f16"):
136
+ output_file = f"{self.output_dir}/phi3.5-mini.{outtype}.gguf"
137
+ cmd = f"python3 llama.cpp/convert-hf-to-gguf.py {self.model_path} --outfile {output_file} --outtype {outtype}"
138
+
139
+ return self.run_command(cmd, f"Converting to GGUF ({outtype})")
140
+
141
+ def quantize_model(self, input_file, quant_type):
142
+ output_file = f"{self.output_dir}/phi3.5-mini.{quant_type}.gguf"
143
+ cmd = f"./llama.cpp/build/bin/quantize {input_file} {output_file} {quant_type}"
144
+
145
+ return self.run_command(cmd, f"Quantizing to {quant_type}")
146
+
147
+ def generate_model_card(self):
148
+ model_card = {
149
+ "model_name": "Phi-3.5-mini-instruct-GGUF-Comprehensive",
150
+ "base_model": "microsoft/Phi-3.5-mini-instruct",
151
+ "description": "Comprehensive GGUF quantization of Phi-3.5-mini-instruct for Saem's Tunes",
152
+ "quantizations": self.quant_types,
153
+ "conversion_date": datetime.now().isoformat(),
154
+ "file_sizes": {},
155
+ "recommended_use_cases": {
156
+ "Q2_K": "Mobile devices, extremely low memory",
157
+ "Q3_K_S": "Mobile devices, low memory",
158
+ "Q4_0": "Good balance for most use cases",
159
+ "Q4_K_M": "Recommended balance (quality/speed)",
160
+ "Q5_K_M": "High quality with reasonable size",
161
+ "Q8_0": "Maximum quality, larger size",
162
+ "F16": "Original quality, large size"
163
+ }
164
+ }
165
+
166
+ with open(f"{self.output_dir}/model_card.json", "w") as f:
167
+ json.dump(model_card, f, indent=2)
168
+
169
+ def run_comprehensive_conversion(self):
170
+ print("πŸš€ STARTING COMPREHENSIVE CONVERSION PIPELINE")
171
+
172
+ # Convert to F16 first
173
+ if not self.convert_to_gguf("f16"):
174
+ return False
175
+
176
+ base_model = f"{self.output_dir}/phi3.5-mini.f16.gguf"
177
+
178
+ # Create all quantization variants
179
+ for quant_type in self.quant_types:
180
+ if quant_type not in ["F16", "F32"]:
181
+ self.quantize_model(base_model, quant_type)
182
+
183
+ # Convert to F32 separately
184
+ self.convert_to_gguf("f32")
185
+
186
+ # Generate model card
187
+ self.generate_model_card()
188
+
189
+ print("πŸŽ‰ COMPREHENSIVE CONVERSION COMPLETED!")
190
+ return True
191
+
192
+ if __name__ == "__main__":
193
+ converter = Phi3Converter("Phi-3.5-mini-instruct", "phi3.5-mini-gguf-complete")
194
+ converter.run_comprehensive_conversion()
195
+ EOF
196
+
197
+ # Run the comprehensive conversion
198
+ echo "πŸƒ STARTING COMPREHENSIVE CONVERSION PROCESS"
199
+ python3 complete_conversion.py
200
+
201
+ # Generate file manifest
202
+ echo "πŸ“Š GENERATING FILE MANIFEST"
203
+ ls -la phi3.5-mini-gguf-complete/ > phi3.5-mini-gguf-complete/file_manifest.txt
204
+
205
+ # Create deployment package
206
+ echo "πŸ“¦ CREATING DEPLOYMENT PACKAGE"
207
+ tar -czvf phi3.5-mini-complete-gguf-$(date +%Y%m%d).tar.gz phi3.5-mini-gguf-complete/
208
+
209
+ echo "πŸŽ‰ COMPLETE CONVERSION SYSTEM FINISHED!"
210
+ echo "πŸ“ Output directory: phi3.5-mini-gguf-complete/"
211
+ echo "πŸ“ Total files generated: $(ls -1 phi3.5-mini-gguf-complete/*.gguf | wc -l)"