Upload convert_to_gguf.py with huggingface_hub
Browse files- convert_to_gguf.py +20 -6
convert_to_gguf.py
CHANGED
|
@@ -77,7 +77,7 @@ subprocess.run(
|
|
| 77 |
capture_output=True
|
| 78 |
)
|
| 79 |
subprocess.run(
|
| 80 |
-
["apt-get", "install", "-y", "-qq", "build-essential"],
|
| 81 |
check=True,
|
| 82 |
capture_output=True
|
| 83 |
)
|
|
@@ -138,13 +138,25 @@ print(f" ✅ FP16 GGUF created: {gguf_file}")
|
|
| 138 |
|
| 139 |
# Step 5: Quantize to different formats
|
| 140 |
print("\n⚙️ Step 5: Creating quantized versions...")
|
| 141 |
-
quantize_bin = "/tmp/llama.cpp/llama-quantize"
|
| 142 |
|
| 143 |
-
# Build quantize tool first
|
| 144 |
-
print(" Building quantize tool...")
|
| 145 |
try:
|
| 146 |
-
|
| 147 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
check=True,
|
| 149 |
capture_output=True,
|
| 150 |
text=True
|
|
@@ -156,6 +168,8 @@ except subprocess.CalledProcessError as e:
|
|
| 156 |
print("STDERR:", e.stderr)
|
| 157 |
raise
|
| 158 |
|
|
|
|
|
|
|
| 159 |
# Common quantization formats
|
| 160 |
quant_formats = [
|
| 161 |
("Q4_K_M", "4-bit, medium quality (recommended)"),
|
|
|
|
| 77 |
capture_output=True
|
| 78 |
)
|
| 79 |
subprocess.run(
|
| 80 |
+
["apt-get", "install", "-y", "-qq", "build-essential", "cmake"],
|
| 81 |
check=True,
|
| 82 |
capture_output=True
|
| 83 |
)
|
|
|
|
| 138 |
|
| 139 |
# Step 5: Quantize to different formats
|
| 140 |
print("\n⚙️ Step 5: Creating quantized versions...")
|
|
|
|
| 141 |
|
| 142 |
+
# Build quantize tool first (using CMake)
|
| 143 |
+
print(" Building quantize tool with CMake...")
|
| 144 |
try:
|
| 145 |
+
# Create build directory
|
| 146 |
+
os.makedirs("/tmp/llama.cpp/build", exist_ok=True)
|
| 147 |
+
|
| 148 |
+
# Configure with CMake
|
| 149 |
+
subprocess.run(
|
| 150 |
+
["cmake", "-B", "/tmp/llama.cpp/build", "-S", "/tmp/llama.cpp",
|
| 151 |
+
"-DGGML_CUDA=OFF"], # Disable CUDA for faster build
|
| 152 |
+
check=True,
|
| 153 |
+
capture_output=True,
|
| 154 |
+
text=True
|
| 155 |
+
)
|
| 156 |
+
|
| 157 |
+
# Build just the quantize tool
|
| 158 |
+
subprocess.run(
|
| 159 |
+
["cmake", "--build", "/tmp/llama.cpp/build", "--target", "llama-quantize", "-j", "4"],
|
| 160 |
check=True,
|
| 161 |
capture_output=True,
|
| 162 |
text=True
|
|
|
|
| 168 |
print("STDERR:", e.stderr)
|
| 169 |
raise
|
| 170 |
|
| 171 |
+
quantize_bin = "/tmp/llama.cpp/build/bin/llama-quantize"
|
| 172 |
+
|
| 173 |
# Common quantization formats
|
| 174 |
quant_formats = [
|
| 175 |
("Q4_K_M", "4-bit, medium quality (recommended)"),
|