mattPearce
/

wp-plugin-recommender-gguf

GGUF

conversational

Model card Files Files and versions

xet

Community

mattPearce commited on Feb 18

Commit

d5318fa

verified ·

1 Parent(s): dad1267

Use pre-built llama.cpp binaries instead of cmake build

Browse files

Files changed (1) hide show

convert_to_gguf.py +46 -20

convert_to_gguf.py CHANGED Viewed

@@ -11,7 +11,7 @@
 # ]
 # ///
-import subprocess, sys
 from pathlib import Path
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from peft import PeftModel
@@ -48,32 +48,58 @@ subprocess.run([sys.executable, "llama.cpp/convert_hf_to_gguf.py",
     str(MERGED_DIR)], check=True)
 print("✅ F16 GGUF created\n")
-# ── Step 4: Build llama-quantize and produce Q4_K_M ───────────────────────────
-print("🔨 Installing cmake + build tools...")
-subprocess.run(["apt-get", "install", "-y", "cmake", "build-essential"], check=True)
-print("🔨 Building llama-quantize...")
-subprocess.run(["cmake", "-B", "llama.cpp/build", "-S", "llama.cpp",
-    "-DCMAKE_BUILD_TYPE=Release", "-DLLAMA_BUILD_TESTS=OFF",
-    "-DLLAMA_BUILD_EXAMPLES=OFF"], check=True)
-subprocess.run(["cmake", "--build", "llama.cpp/build",
-    "--target", "llama-quantize", "-j4"], check=True)
-print("🗜️  Quantizing to Q4_K_M...")
-quantize_bin = next(Path("llama.cpp/build").rglob("llama-quantize"), None)
-if quantize_bin is None:
-    quantize_bin = next(Path("llama.cpp/build").rglob("quantize"), None)
-subprocess.run([str(quantize_bin), "model-f16.gguf",
-    "model-q4_k_m.gguf", "Q4_K_M"], check=True)
-print("✅ Q4_K_M GGUF created\n")
 # ── Step 5: Upload to Hub ──────────────────────────────────────────────────────
 print("📤 Creating repo and uploading...")
 api = HfApi()
 api.create_repo(GGUF_REPO, exist_ok=True, private=False)
 api.upload_file(
-    path_or_fileobj="model-q4_k_m.gguf",
-    path_in_repo="wp-plugin-recommender-q4_k_m.gguf",
     repo_id=GGUF_REPO,
-    commit_message="Add Q4_K_M GGUF (merged Qwen2.5-0.5B + LoRA)"
 )
 print(f"\n✅ Done — https://huggingface.co/mattPearce/wp-plugin-recommender-gguf")

 # ]
 # ///
+import subprocess, sys, urllib.request, json, zipfile, stat
 from pathlib import Path
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from peft import PeftModel
     str(MERGED_DIR)], check=True)
 print("✅ F16 GGUF created\n")
+# ── Step 4: Download pre-built llama-quantize ─────────────────────────────────
+print("📦 Fetching latest llama.cpp release...")
+req = urllib.request.Request(
+    "https://api.github.com/repos/ggerganov/llama.cpp/releases/latest",
+    headers={"User-Agent": "Python/3", "Accept": "application/vnd.github.v3+json"}
+)
+with urllib.request.urlopen(req) as r:
+    release = json.load(r)
+print(f"Latest llama.cpp: {release['tag_name']}")
+binary_url = None
+for asset in release['assets']:
+    n = asset['name'].lower()
+    if 'ubuntu' in n and 'x64' in n and n.endswith('.zip'):
+        binary_url = asset['browser_download_url']
+        print(f"Downloading: {asset['name']}")
+        break
+quantize_bin = None
+if binary_url:
+    urllib.request.urlretrieve(binary_url, "llama-bin.zip")
+    with zipfile.ZipFile("llama-bin.zip") as zf:
+        zf.extractall("llama-bin")
+    for f in Path("llama-bin").rglob("*"):
+        if f.is_file():
+            f.chmod(f.stat().st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH)
+    quantize_bin = next(Path("llama-bin").rglob("llama-quantize"), None)
+    if quantize_bin is None:
+        quantize_bin = next(Path("llama-bin").rglob("quantize"), None)
+if quantize_bin:
+    print(f"🗜️  Quantizing to Q4_K_M with {quantize_bin}...")
+    subprocess.run([str(quantize_bin), "model-f16.gguf",
+        "model-q4_k_m.gguf", "Q4_K_M"], check=True)
+    out_file = "model-q4_k_m.gguf"
+    out_name = "wp-plugin-recommender-q4_k_m.gguf"
+    msg = "Add Q4_K_M GGUF (merged Qwen2.5-0.5B + LoRA)"
+    print("✅ Q4_K_M GGUF created\n")
+else:
+    print("⚠️  llama-quantize not found in release, uploading F16 GGUF instead...")
+    out_file = "model-f16.gguf"
+    out_name = "wp-plugin-recommender-f16.gguf"
+    msg = "Add F16 GGUF (merged Qwen2.5-0.5B + LoRA)"
 # ── Step 5: Upload to Hub ──────────────────────────────────────────────────────
 print("📤 Creating repo and uploading...")
 api = HfApi()
 api.create_repo(GGUF_REPO, exist_ok=True, private=False)
 api.upload_file(
+    path_or_fileobj=out_file,
+    path_in_repo=out_name,
     repo_id=GGUF_REPO,
+    commit_message=msg
 )
 print(f"\n✅ Done — https://huggingface.co/mattPearce/wp-plugin-recommender-gguf")