nathens
/

training-scripts

nathens commited on Dec 18, 2025

Commit

f3c2b7f

verified ·

1 Parent(s): 7d112ab

Upload convert_to_gguf.py with huggingface_hub

Files changed (1) hide show

convert_to_gguf.py CHANGED Viewed

@@ -51,21 +51,25 @@ print(f"✅ Merged model saved to {merged_dir}")
 # Step 4: Install llama.cpp for conversion
 print("\n📥 Installing llama.cpp for GGUF conversion...")
 subprocess.run(["apt-get", "update", "-qq"], check=True)
-subprocess.run(["apt-get", "install", "-y", "-qq", "git", "build-essential"], check=True)
 subprocess.run(["git", "clone", "https://github.com/ggerganov/llama.cpp.git"], check=True)
 # Get number of processors
 nproc_result = subprocess.run(["nproc"], capture_output=True, text=True, check=True)
 nproc = nproc_result.stdout.strip()
-print(f"Building llama.cpp with {nproc} cores...")
-subprocess.run(["make", "-C", "llama.cpp", "-j", nproc], check=True)
 # Step 5: Convert to GGUF format
 print("\n🔄 Converting to GGUF format...")
 subprocess.run([
     "python3", "llama.cpp/convert_hf_to_gguf.py",
     merged_dir,
-    "--outfile", f"./model-f16.gguf",
     "--outtype", "f16"
 ], check=True)
 print("✅ Converted to FP16 GGUF")
@@ -73,7 +77,7 @@ print("✅ Converted to FP16 GGUF")
 # Step 6: Quantize to specified format
 print(f"\n⚡ Quantizing to {QUANTIZATION}...")
 subprocess.run([
-    "./llama.cpp/llama-quantize",
     "./model-f16.gguf",
     f"./model-{QUANTIZATION}.gguf",
     QUANTIZATION

 # Step 4: Install llama.cpp for conversion
 print("\n📥 Installing llama.cpp for GGUF conversion...")
 subprocess.run(["apt-get", "update", "-qq"], check=True)
+subprocess.run(["apt-get", "install", "-y", "-qq", "git", "build-essential", "cmake"], check=True)
 subprocess.run(["git", "clone", "https://github.com/ggerganov/llama.cpp.git"], check=True)
 # Get number of processors
 nproc_result = subprocess.run(["nproc"], capture_output=True, text=True, check=True)
 nproc = nproc_result.stdout.strip()
+print(f"Building llama.cpp with {nproc} cores using CMake...")
+# Use CMake to build
+os.makedirs("llama.cpp/build", exist_ok=True)
+subprocess.run(["cmake", "-B", "llama.cpp/build", "-S", "llama.cpp", "-DGGML_CUDA=ON"], check=True)
+subprocess.run(["cmake", "--build", "llama.cpp/build", "--config", "Release", "-j", nproc], check=True)
 # Step 5: Convert to GGUF format
 print("\n🔄 Converting to GGUF format...")
 subprocess.run([
     "python3", "llama.cpp/convert_hf_to_gguf.py",
     merged_dir,
+    "--outfile", "./model-f16.gguf",
     "--outtype", "f16"
 ], check=True)
 print("✅ Converted to FP16 GGUF")
 # Step 6: Quantize to specified format
 print(f"\n⚡ Quantizing to {QUANTIZATION}...")
 subprocess.run([
+    "./llama.cpp/build/bin/llama-quantize",
     "./model-f16.gguf",
     f"./model-{QUANTIZATION}.gguf",
     QUANTIZATION