Upload convert_to_gguf.py with huggingface_hub
Browse files- convert_to_gguf.py +9 -5
convert_to_gguf.py
CHANGED
|
@@ -51,21 +51,25 @@ print(f"✅ Merged model saved to {merged_dir}")
|
|
| 51 |
# Step 4: Install llama.cpp for conversion
|
| 52 |
print("\n📥 Installing llama.cpp for GGUF conversion...")
|
| 53 |
subprocess.run(["apt-get", "update", "-qq"], check=True)
|
| 54 |
-
subprocess.run(["apt-get", "install", "-y", "-qq", "git", "build-essential"], check=True)
|
| 55 |
subprocess.run(["git", "clone", "https://github.com/ggerganov/llama.cpp.git"], check=True)
|
| 56 |
|
| 57 |
# Get number of processors
|
| 58 |
nproc_result = subprocess.run(["nproc"], capture_output=True, text=True, check=True)
|
| 59 |
nproc = nproc_result.stdout.strip()
|
| 60 |
-
print(f"Building llama.cpp with {nproc} cores...")
|
| 61 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
|
| 63 |
# Step 5: Convert to GGUF format
|
| 64 |
print("\n🔄 Converting to GGUF format...")
|
| 65 |
subprocess.run([
|
| 66 |
"python3", "llama.cpp/convert_hf_to_gguf.py",
|
| 67 |
merged_dir,
|
| 68 |
-
"--outfile",
|
| 69 |
"--outtype", "f16"
|
| 70 |
], check=True)
|
| 71 |
print("✅ Converted to FP16 GGUF")
|
|
@@ -73,7 +77,7 @@ print("✅ Converted to FP16 GGUF")
|
|
| 73 |
# Step 6: Quantize to specified format
|
| 74 |
print(f"\n⚡ Quantizing to {QUANTIZATION}...")
|
| 75 |
subprocess.run([
|
| 76 |
-
"./llama.cpp/llama-quantize",
|
| 77 |
"./model-f16.gguf",
|
| 78 |
f"./model-{QUANTIZATION}.gguf",
|
| 79 |
QUANTIZATION
|
|
|
|
| 51 |
# Step 4: Install llama.cpp for conversion
|
| 52 |
print("\n📥 Installing llama.cpp for GGUF conversion...")
|
| 53 |
subprocess.run(["apt-get", "update", "-qq"], check=True)
|
| 54 |
+
subprocess.run(["apt-get", "install", "-y", "-qq", "git", "build-essential", "cmake"], check=True)
|
| 55 |
subprocess.run(["git", "clone", "https://github.com/ggerganov/llama.cpp.git"], check=True)
|
| 56 |
|
| 57 |
# Get number of processors
|
| 58 |
nproc_result = subprocess.run(["nproc"], capture_output=True, text=True, check=True)
|
| 59 |
nproc = nproc_result.stdout.strip()
|
| 60 |
+
print(f"Building llama.cpp with {nproc} cores using CMake...")
|
| 61 |
+
|
| 62 |
+
# Use CMake to build
|
| 63 |
+
os.makedirs("llama.cpp/build", exist_ok=True)
|
| 64 |
+
subprocess.run(["cmake", "-B", "llama.cpp/build", "-S", "llama.cpp", "-DGGML_CUDA=ON"], check=True)
|
| 65 |
+
subprocess.run(["cmake", "--build", "llama.cpp/build", "--config", "Release", "-j", nproc], check=True)
|
| 66 |
|
| 67 |
# Step 5: Convert to GGUF format
|
| 68 |
print("\n🔄 Converting to GGUF format...")
|
| 69 |
subprocess.run([
|
| 70 |
"python3", "llama.cpp/convert_hf_to_gguf.py",
|
| 71 |
merged_dir,
|
| 72 |
+
"--outfile", "./model-f16.gguf",
|
| 73 |
"--outtype", "f16"
|
| 74 |
], check=True)
|
| 75 |
print("✅ Converted to FP16 GGUF")
|
|
|
|
| 77 |
# Step 6: Quantize to specified format
|
| 78 |
print(f"\n⚡ Quantizing to {QUANTIZATION}...")
|
| 79 |
subprocess.run([
|
| 80 |
+
"./llama.cpp/build/bin/llama-quantize",
|
| 81 |
"./model-f16.gguf",
|
| 82 |
f"./model-{QUANTIZATION}.gguf",
|
| 83 |
QUANTIZATION
|