Use pre-built llama.cpp binaries instead of cmake build
Browse files- convert_to_gguf.py +46 -20
convert_to_gguf.py
CHANGED
|
@@ -11,7 +11,7 @@
|
|
| 11 |
# ]
|
| 12 |
# ///
|
| 13 |
|
| 14 |
-
import subprocess, sys
|
| 15 |
from pathlib import Path
|
| 16 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 17 |
from peft import PeftModel
|
|
@@ -48,32 +48,58 @@ subprocess.run([sys.executable, "llama.cpp/convert_hf_to_gguf.py",
|
|
| 48 |
str(MERGED_DIR)], check=True)
|
| 49 |
print("β
F16 GGUF created\n")
|
| 50 |
|
| 51 |
-
# ββ Step 4:
|
| 52 |
-
print("
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
"
|
| 67 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
|
| 69 |
# ββ Step 5: Upload to Hub ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 70 |
print("π€ Creating repo and uploading...")
|
| 71 |
api = HfApi()
|
| 72 |
api.create_repo(GGUF_REPO, exist_ok=True, private=False)
|
| 73 |
api.upload_file(
|
| 74 |
-
path_or_fileobj=
|
| 75 |
-
path_in_repo=
|
| 76 |
repo_id=GGUF_REPO,
|
| 77 |
-
commit_message=
|
| 78 |
)
|
| 79 |
print(f"\nβ
Done β https://huggingface.co/mattPearce/wp-plugin-recommender-gguf")
|
|
|
|
| 11 |
# ]
|
| 12 |
# ///
|
| 13 |
|
| 14 |
+
import subprocess, sys, urllib.request, json, zipfile, stat
|
| 15 |
from pathlib import Path
|
| 16 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 17 |
from peft import PeftModel
|
|
|
|
| 48 |
str(MERGED_DIR)], check=True)
|
| 49 |
print("β
F16 GGUF created\n")
|
| 50 |
|
| 51 |
+
# ββ Step 4: Download pre-built llama-quantize βββββββββββββββββββββββββββββββββ
|
| 52 |
+
print("π¦ Fetching latest llama.cpp release...")
|
| 53 |
+
req = urllib.request.Request(
|
| 54 |
+
"https://api.github.com/repos/ggerganov/llama.cpp/releases/latest",
|
| 55 |
+
headers={"User-Agent": "Python/3", "Accept": "application/vnd.github.v3+json"}
|
| 56 |
+
)
|
| 57 |
+
with urllib.request.urlopen(req) as r:
|
| 58 |
+
release = json.load(r)
|
| 59 |
+
print(f"Latest llama.cpp: {release['tag_name']}")
|
| 60 |
+
|
| 61 |
+
binary_url = None
|
| 62 |
+
for asset in release['assets']:
|
| 63 |
+
n = asset['name'].lower()
|
| 64 |
+
if 'ubuntu' in n and 'x64' in n and n.endswith('.zip'):
|
| 65 |
+
binary_url = asset['browser_download_url']
|
| 66 |
+
print(f"Downloading: {asset['name']}")
|
| 67 |
+
break
|
| 68 |
+
|
| 69 |
+
quantize_bin = None
|
| 70 |
+
if binary_url:
|
| 71 |
+
urllib.request.urlretrieve(binary_url, "llama-bin.zip")
|
| 72 |
+
with zipfile.ZipFile("llama-bin.zip") as zf:
|
| 73 |
+
zf.extractall("llama-bin")
|
| 74 |
+
for f in Path("llama-bin").rglob("*"):
|
| 75 |
+
if f.is_file():
|
| 76 |
+
f.chmod(f.stat().st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH)
|
| 77 |
+
quantize_bin = next(Path("llama-bin").rglob("llama-quantize"), None)
|
| 78 |
+
if quantize_bin is None:
|
| 79 |
+
quantize_bin = next(Path("llama-bin").rglob("quantize"), None)
|
| 80 |
|
| 81 |
+
if quantize_bin:
|
| 82 |
+
print(f"ποΈ Quantizing to Q4_K_M with {quantize_bin}...")
|
| 83 |
+
subprocess.run([str(quantize_bin), "model-f16.gguf",
|
| 84 |
+
"model-q4_k_m.gguf", "Q4_K_M"], check=True)
|
| 85 |
+
out_file = "model-q4_k_m.gguf"
|
| 86 |
+
out_name = "wp-plugin-recommender-q4_k_m.gguf"
|
| 87 |
+
msg = "Add Q4_K_M GGUF (merged Qwen2.5-0.5B + LoRA)"
|
| 88 |
+
print("β
Q4_K_M GGUF created\n")
|
| 89 |
+
else:
|
| 90 |
+
print("β οΈ llama-quantize not found in release, uploading F16 GGUF instead...")
|
| 91 |
+
out_file = "model-f16.gguf"
|
| 92 |
+
out_name = "wp-plugin-recommender-f16.gguf"
|
| 93 |
+
msg = "Add F16 GGUF (merged Qwen2.5-0.5B + LoRA)"
|
| 94 |
|
| 95 |
# ββ Step 5: Upload to Hub ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 96 |
print("π€ Creating repo and uploading...")
|
| 97 |
api = HfApi()
|
| 98 |
api.create_repo(GGUF_REPO, exist_ok=True, private=False)
|
| 99 |
api.upload_file(
|
| 100 |
+
path_or_fileobj=out_file,
|
| 101 |
+
path_in_repo=out_name,
|
| 102 |
repo_id=GGUF_REPO,
|
| 103 |
+
commit_message=msg
|
| 104 |
)
|
| 105 |
print(f"\nβ
Done β https://huggingface.co/mattPearce/wp-plugin-recommender-gguf")
|