evalstate HF Staff commited on
Commit
fd76a62
·
verified ·
1 Parent(s): 6a4838b

Upload convert_to_gguf.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. convert_to_gguf.py +20 -6
convert_to_gguf.py CHANGED
@@ -77,7 +77,7 @@ subprocess.run(
77
  capture_output=True
78
  )
79
  subprocess.run(
80
- ["apt-get", "install", "-y", "-qq", "build-essential"],
81
  check=True,
82
  capture_output=True
83
  )
@@ -138,13 +138,25 @@ print(f" ✅ FP16 GGUF created: {gguf_file}")
138
 
139
  # Step 5: Quantize to different formats
140
  print("\n⚙️ Step 5: Creating quantized versions...")
141
- quantize_bin = "/tmp/llama.cpp/llama-quantize"
142
 
143
- # Build quantize tool first
144
- print(" Building quantize tool...")
145
  try:
146
- result = subprocess.run(
147
- ["make", "-C", "/tmp/llama.cpp", "llama-quantize"],
 
 
 
 
 
 
 
 
 
 
 
 
 
148
  check=True,
149
  capture_output=True,
150
  text=True
@@ -156,6 +168,8 @@ except subprocess.CalledProcessError as e:
156
  print("STDERR:", e.stderr)
157
  raise
158
 
 
 
159
  # Common quantization formats
160
  quant_formats = [
161
  ("Q4_K_M", "4-bit, medium quality (recommended)"),
 
77
  capture_output=True
78
  )
79
  subprocess.run(
80
+ ["apt-get", "install", "-y", "-qq", "build-essential", "cmake"],
81
  check=True,
82
  capture_output=True
83
  )
 
138
 
139
  # Step 5: Quantize to different formats
140
  print("\n⚙️ Step 5: Creating quantized versions...")
 
141
 
142
+ # Build quantize tool first (using CMake)
143
+ print(" Building quantize tool with CMake...")
144
  try:
145
+ # Create build directory
146
+ os.makedirs("/tmp/llama.cpp/build", exist_ok=True)
147
+
148
+ # Configure with CMake
149
+ subprocess.run(
150
+ ["cmake", "-B", "/tmp/llama.cpp/build", "-S", "/tmp/llama.cpp",
151
+ "-DGGML_CUDA=OFF"], # Disable CUDA for faster build
152
+ check=True,
153
+ capture_output=True,
154
+ text=True
155
+ )
156
+
157
+ # Build just the quantize tool
158
+ subprocess.run(
159
+ ["cmake", "--build", "/tmp/llama.cpp/build", "--target", "llama-quantize", "-j", "4"],
160
  check=True,
161
  capture_output=True,
162
  text=True
 
168
  print("STDERR:", e.stderr)
169
  raise
170
 
171
+ quantize_bin = "/tmp/llama.cpp/build/bin/llama-quantize"
172
+
173
  # Common quantization formats
174
  quant_formats = [
175
  ("Q4_K_M", "4-bit, medium quality (recommended)"),