nathens commited on
Commit
f3c2b7f
·
verified ·
1 Parent(s): 7d112ab

Upload convert_to_gguf.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. convert_to_gguf.py +9 -5
convert_to_gguf.py CHANGED
@@ -51,21 +51,25 @@ print(f"✅ Merged model saved to {merged_dir}")
51
  # Step 4: Install llama.cpp for conversion
52
  print("\n📥 Installing llama.cpp for GGUF conversion...")
53
  subprocess.run(["apt-get", "update", "-qq"], check=True)
54
- subprocess.run(["apt-get", "install", "-y", "-qq", "git", "build-essential"], check=True)
55
  subprocess.run(["git", "clone", "https://github.com/ggerganov/llama.cpp.git"], check=True)
56
 
57
  # Get number of processors
58
  nproc_result = subprocess.run(["nproc"], capture_output=True, text=True, check=True)
59
  nproc = nproc_result.stdout.strip()
60
- print(f"Building llama.cpp with {nproc} cores...")
61
- subprocess.run(["make", "-C", "llama.cpp", "-j", nproc], check=True)
 
 
 
 
62
 
63
  # Step 5: Convert to GGUF format
64
  print("\n🔄 Converting to GGUF format...")
65
  subprocess.run([
66
  "python3", "llama.cpp/convert_hf_to_gguf.py",
67
  merged_dir,
68
- "--outfile", f"./model-f16.gguf",
69
  "--outtype", "f16"
70
  ], check=True)
71
  print("✅ Converted to FP16 GGUF")
@@ -73,7 +77,7 @@ print("✅ Converted to FP16 GGUF")
73
  # Step 6: Quantize to specified format
74
  print(f"\n⚡ Quantizing to {QUANTIZATION}...")
75
  subprocess.run([
76
- "./llama.cpp/llama-quantize",
77
  "./model-f16.gguf",
78
  f"./model-{QUANTIZATION}.gguf",
79
  QUANTIZATION
 
51
  # Step 4: Install llama.cpp for conversion
52
  print("\n📥 Installing llama.cpp for GGUF conversion...")
53
  subprocess.run(["apt-get", "update", "-qq"], check=True)
54
+ subprocess.run(["apt-get", "install", "-y", "-qq", "git", "build-essential", "cmake"], check=True)
55
  subprocess.run(["git", "clone", "https://github.com/ggerganov/llama.cpp.git"], check=True)
56
 
57
  # Get number of processors
58
  nproc_result = subprocess.run(["nproc"], capture_output=True, text=True, check=True)
59
  nproc = nproc_result.stdout.strip()
60
+ print(f"Building llama.cpp with {nproc} cores using CMake...")
61
+
62
+ # Use CMake to build
63
+ os.makedirs("llama.cpp/build", exist_ok=True)
64
+ subprocess.run(["cmake", "-B", "llama.cpp/build", "-S", "llama.cpp", "-DGGML_CUDA=ON"], check=True)
65
+ subprocess.run(["cmake", "--build", "llama.cpp/build", "--config", "Release", "-j", nproc], check=True)
66
 
67
  # Step 5: Convert to GGUF format
68
  print("\n🔄 Converting to GGUF format...")
69
  subprocess.run([
70
  "python3", "llama.cpp/convert_hf_to_gguf.py",
71
  merged_dir,
72
+ "--outfile", "./model-f16.gguf",
73
  "--outtype", "f16"
74
  ], check=True)
75
  print("✅ Converted to FP16 GGUF")
 
77
  # Step 6: Quantize to specified format
78
  print(f"\n⚡ Quantizing to {QUANTIZATION}...")
79
  subprocess.run([
80
+ "./llama.cpp/build/bin/llama-quantize",
81
  "./model-f16.gguf",
82
  f"./model-{QUANTIZATION}.gguf",
83
  QUANTIZATION