Spaces:

saemstunes
/

STA-AI

Running

App Files Files Community

STA-AI / convert_phi35_complete.sh

saemstunes

Create convert_phi35_complete.sh

e7e5eda verified 5 months ago

raw

history blame contribute delete

6.6 kB

	#!/bin/bash
	set -e

	echo "🚀 COMPREHENSIVE PHI-3.5-MINI CONVERSION & QUANTIZATION SYSTEM"
	echo "================================================================"

	# Configuration
	MODEL_NAME="microsoft/Phi-3.5-mini-instruct"
	OUTPUT_DIR="./phi3.5-mini-gguf-complete"
	QUANT_TYPES=("Q2_K" "Q3_K_S" "Q3_K_M" "Q3_K_L" "Q4_0" "Q4_1" "Q4_K_S" "Q4_K_M" "Q5_0" "Q5_1" "Q5_K_S" "Q5_K_M" "Q6_K" "Q8_0" "F16" "F32")

	# Create directories
	mkdir -p $OUTPUT_DIR
	mkdir -p $OUTPUT_DIR/artifacts
	mkdir -p $OUTPUT_DIR/logs

	# Install system dependencies
	echo "📦 INSTALLING SYSTEM DEPENDENCIES"
	sudo apt-get update
	sudo apt-get install -y \
	build-essential \
	cmake \
	git \
	python3 \
	python3-pip \
	python3-venv \
	libcurl4-openssl-dev \
	git-lfs \
	wget \
	curl \
	pkg-config \
	libopenblas-dev \
	libatomic-dev

	# Initialize Git LFS
	git lfs install

	# Create Python virtual environment
	python3 -m venv phi3-convert
	source phi3-convert/bin/activate

	# Install Python dependencies
	pip install --upgrade pip
	pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
	pip install transformers>=4.43.0
	pip install safetensors
	pip install huggingface-hub
	pip install accelerate
	pip install protobuf
	pip install sentencepiece
	pip install ninja
	pip install requests
	pip install tqdm
	pip install psutil
	pip install GPUtil

	# Clone and build llama.cpp with all optimizations
	echo "🔨 BUILDING LLAMA.CPP WITH FULL OPTIMIZATIONS"
	if [ ! -d "llama.cpp" ]; then
	git clone https://github.com/ggerganov/llama.cpp
	fi

	cd llama.cpp

	# Clean build directory
	rm -rf build
	mkdir -p build
	cd build

	# Configure with maximum optimizations
	cmake .. \
	-DCMAKE_BUILD_TYPE=Release \
	-DLLAMA_NATIVE=ON \
	-DLLAMA_CURL=ON \
	-DLLAMA_OPENBLAS=ON \
	-DLLAMA_BLAS=ON \
	-DBLAS_LIBRARIES=/usr/lib/openblas-base/libopenblas.a \
	-DLLAMA_CUDA=OFF \
	-DLLAMA_METAL=OFF \
	-DLLAMA_MPI=OFF \
	-DLLAMA_CCACHE=ON \
	-DLLAMA_BUILD_TESTS=ON \
	-DLLAMA_BUILD_EXAMPLES=ON

	# Build with maximum parallelism
	make -j$(nproc)

	# Return to main directory
	cd ../..

	# Download the model
	echo "📥 DOWNLOADING PHI-3.5-MINI-INSTRUCT MODEL"
	if [ ! -d "Phi-3.5-mini-instruct" ]; then
	git clone https://huggingface.co/microsoft/Phi-3.5-mini-instruct
	fi

	# Verify model files
	echo "🔍 VERIFYING MODEL FILES"
	cd Phi-3.5-mini-instruct
	ls -la
	cd ..

	# Create comprehensive conversion script
	cat > complete_conversion.py << 'EOF'
	import os
	import sys
	import subprocess
	import json
	import time
	from datetime import datetime

	class Phi3Converter:
	def __init__(self, model_path, output_dir):
	self.model_path = model_path
	self.output_dir = output_dir
	self.quant_types = ["Q2_K", "Q3_K_S", "Q3_K_M", "Q3_K_L", "Q4_0", "Q4_1",
	"Q4_K_S", "Q4_K_M", "Q5_0", "Q5_1", "Q5_K_S", "Q5_K_M",
	"Q6_K", "Q8_0", "F16", "F32"]

	def run_command(self, cmd, description):
	print(f"🔄 {description}")
	print(f" Command: {cmd}")

	start_time = time.time()
	result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
	end_time = time.time()

	if result.returncode != 0:
	print(f"❌ Error: {result.stderr}")
	return False

	print(f"✅ Completed in {end_time - start_time:.2f}s")
	return True

	def convert_to_gguf(self, outtype="f16"):
	output_file = f"{self.output_dir}/phi3.5-mini.{outtype}.gguf"
	cmd = f"python3 llama.cpp/convert-hf-to-gguf.py {self.model_path} --outfile {output_file} --outtype {outtype}"

	return self.run_command(cmd, f"Converting to GGUF ({outtype})")

	def quantize_model(self, input_file, quant_type):
	output_file = f"{self.output_dir}/phi3.5-mini.{quant_type}.gguf"
	cmd = f"./llama.cpp/build/bin/quantize {input_file} {output_file} {quant_type}"

	return self.run_command(cmd, f"Quantizing to {quant_type}")

	def generate_model_card(self):
	model_card = {
	"model_name": "Phi-3.5-mini-instruct-GGUF-Comprehensive",
	"base_model": "microsoft/Phi-3.5-mini-instruct",
	"description": "Comprehensive GGUF quantization of Phi-3.5-mini-instruct for Saem's Tunes",
	"quantizations": self.quant_types,
	"conversion_date": datetime.now().isoformat(),
	"file_sizes": {},
	"recommended_use_cases": {
	"Q2_K": "Mobile devices, extremely low memory",
	"Q3_K_S": "Mobile devices, low memory",
	"Q4_0": "Good balance for most use cases",
	"Q4_K_M": "Recommended balance (quality/speed)",
	"Q5_K_M": "High quality with reasonable size",
	"Q8_0": "Maximum quality, larger size",
	"F16": "Original quality, large size"
	}
	}

	with open(f"{self.output_dir}/model_card.json", "w") as f:
	json.dump(model_card, f, indent=2)

	def run_comprehensive_conversion(self):
	print("🚀 STARTING COMPREHENSIVE CONVERSION PIPELINE")

	# Convert to F16 first
	if not self.convert_to_gguf("f16"):
	return False

	base_model = f"{self.output_dir}/phi3.5-mini.f16.gguf"

	# Create all quantization variants
	for quant_type in self.quant_types:
	if quant_type not in ["F16", "F32"]:
	self.quantize_model(base_model, quant_type)

	# Convert to F32 separately
	self.convert_to_gguf("f32")

	# Generate model card
	self.generate_model_card()

	print("🎉 COMPREHENSIVE CONVERSION COMPLETED!")
	return True

	if __name__ == "__main__":
	converter = Phi3Converter("Phi-3.5-mini-instruct", "phi3.5-mini-gguf-complete")
	converter.run_comprehensive_conversion()
	EOF

	# Run the comprehensive conversion
	echo "🏃 STARTING COMPREHENSIVE CONVERSION PROCESS"
	python3 complete_conversion.py

	# Generate file manifest
	echo "📊 GENERATING FILE MANIFEST"
	ls -la phi3.5-mini-gguf-complete/ > phi3.5-mini-gguf-complete/file_manifest.txt

	# Create deployment package
	echo "📦 CREATING DEPLOYMENT PACKAGE"
	tar -czvf phi3.5-mini-complete-gguf-$(date +%Y%m%d).tar.gz phi3.5-mini-gguf-complete/

	echo "🎉 COMPLETE CONVERSION SYSTEM FINISHED!"
	echo "📍 Output directory: phi3.5-mini-gguf-complete/"
	echo "📁 Total files generated: $(ls -1 phi3.5-mini-gguf-complete/*.gguf \| wc -l)"