evalstate
/

trl-demo-scripts

evalstate HF Staff commited on Oct 28, 2025

Commit

fd76a62

verified ·

1 Parent(s): 6a4838b

Upload convert_to_gguf.py with huggingface_hub

Files changed (1) hide show

convert_to_gguf.py CHANGED Viewed

@@ -77,7 +77,7 @@ subprocess.run(
     capture_output=True
 )
 subprocess.run(
-    ["apt-get", "install", "-y", "-qq", "build-essential"],
     check=True,
     capture_output=True
 )
@@ -138,13 +138,25 @@ print(f"   ✅ FP16 GGUF created: {gguf_file}")
 # Step 5: Quantize to different formats
 print("\n⚙️  Step 5: Creating quantized versions...")
-quantize_bin = "/tmp/llama.cpp/llama-quantize"
-# Build quantize tool first
-print("   Building quantize tool...")
 try:
-    result = subprocess.run(
-        ["make", "-C", "/tmp/llama.cpp", "llama-quantize"],
         check=True,
         capture_output=True,
         text=True
@@ -156,6 +168,8 @@ except subprocess.CalledProcessError as e:
     print("STDERR:", e.stderr)
     raise
 # Common quantization formats
 quant_formats = [
     ("Q4_K_M", "4-bit, medium quality (recommended)"),

     capture_output=True
 )
 subprocess.run(
+    ["apt-get", "install", "-y", "-qq", "build-essential", "cmake"],
     check=True,
     capture_output=True
 )
 # Step 5: Quantize to different formats
 print("\n⚙️  Step 5: Creating quantized versions...")
+# Build quantize tool first (using CMake)
+print("   Building quantize tool with CMake...")
 try:
+    # Create build directory
+    os.makedirs("/tmp/llama.cpp/build", exist_ok=True)
+    # Configure with CMake
+    subprocess.run(
+        ["cmake", "-B", "/tmp/llama.cpp/build", "-S", "/tmp/llama.cpp",
+         "-DGGML_CUDA=OFF"],  # Disable CUDA for faster build
+        check=True,
+        capture_output=True,
+        text=True
+    )
+    # Build just the quantize tool
+    subprocess.run(
+        ["cmake", "--build", "/tmp/llama.cpp/build", "--target", "llama-quantize", "-j", "4"],
         check=True,
         capture_output=True,
         text=True
     print("STDERR:", e.stderr)
     raise
+quantize_bin = "/tmp/llama.cpp/build/bin/llama-quantize"
 # Common quantization formats
 quant_formats = [
     ("Q4_K_M", "4-bit, medium quality (recommended)"),