mattPearce commited on
Commit
d5318fa
Β·
verified Β·
1 Parent(s): dad1267

Use pre-built llama.cpp binaries instead of cmake build

Browse files
Files changed (1) hide show
  1. convert_to_gguf.py +46 -20
convert_to_gguf.py CHANGED
@@ -11,7 +11,7 @@
11
  # ]
12
  # ///
13
 
14
- import subprocess, sys
15
  from pathlib import Path
16
  from transformers import AutoModelForCausalLM, AutoTokenizer
17
  from peft import PeftModel
@@ -48,32 +48,58 @@ subprocess.run([sys.executable, "llama.cpp/convert_hf_to_gguf.py",
48
  str(MERGED_DIR)], check=True)
49
  print("βœ… F16 GGUF created\n")
50
 
51
- # ── Step 4: Build llama-quantize and produce Q4_K_M ───────────────────────────
52
- print("πŸ”¨ Installing cmake + build tools...")
53
- subprocess.run(["apt-get", "install", "-y", "cmake", "build-essential"], check=True)
54
- print("πŸ”¨ Building llama-quantize...")
55
- subprocess.run(["cmake", "-B", "llama.cpp/build", "-S", "llama.cpp",
56
- "-DCMAKE_BUILD_TYPE=Release", "-DLLAMA_BUILD_TESTS=OFF",
57
- "-DLLAMA_BUILD_EXAMPLES=OFF"], check=True)
58
- subprocess.run(["cmake", "--build", "llama.cpp/build",
59
- "--target", "llama-quantize", "-j4"], check=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
- print("πŸ—œοΈ Quantizing to Q4_K_M...")
62
- quantize_bin = next(Path("llama.cpp/build").rglob("llama-quantize"), None)
63
- if quantize_bin is None:
64
- quantize_bin = next(Path("llama.cpp/build").rglob("quantize"), None)
65
- subprocess.run([str(quantize_bin), "model-f16.gguf",
66
- "model-q4_k_m.gguf", "Q4_K_M"], check=True)
67
- print("βœ… Q4_K_M GGUF created\n")
 
 
 
 
 
 
68
 
69
  # ── Step 5: Upload to Hub ──────────────────────────────────────────────────────
70
  print("πŸ“€ Creating repo and uploading...")
71
  api = HfApi()
72
  api.create_repo(GGUF_REPO, exist_ok=True, private=False)
73
  api.upload_file(
74
- path_or_fileobj="model-q4_k_m.gguf",
75
- path_in_repo="wp-plugin-recommender-q4_k_m.gguf",
76
  repo_id=GGUF_REPO,
77
- commit_message="Add Q4_K_M GGUF (merged Qwen2.5-0.5B + LoRA)"
78
  )
79
  print(f"\nβœ… Done β€” https://huggingface.co/mattPearce/wp-plugin-recommender-gguf")
 
11
  # ]
12
  # ///
13
 
14
+ import subprocess, sys, urllib.request, json, zipfile, stat
15
  from pathlib import Path
16
  from transformers import AutoModelForCausalLM, AutoTokenizer
17
  from peft import PeftModel
 
48
  str(MERGED_DIR)], check=True)
49
  print("βœ… F16 GGUF created\n")
50
 
51
+ # ── Step 4: Download pre-built llama-quantize ─────────────────────────────────
52
+ print("πŸ“¦ Fetching latest llama.cpp release...")
53
+ req = urllib.request.Request(
54
+ "https://api.github.com/repos/ggerganov/llama.cpp/releases/latest",
55
+ headers={"User-Agent": "Python/3", "Accept": "application/vnd.github.v3+json"}
56
+ )
57
+ with urllib.request.urlopen(req) as r:
58
+ release = json.load(r)
59
+ print(f"Latest llama.cpp: {release['tag_name']}")
60
+
61
+ binary_url = None
62
+ for asset in release['assets']:
63
+ n = asset['name'].lower()
64
+ if 'ubuntu' in n and 'x64' in n and n.endswith('.zip'):
65
+ binary_url = asset['browser_download_url']
66
+ print(f"Downloading: {asset['name']}")
67
+ break
68
+
69
+ quantize_bin = None
70
+ if binary_url:
71
+ urllib.request.urlretrieve(binary_url, "llama-bin.zip")
72
+ with zipfile.ZipFile("llama-bin.zip") as zf:
73
+ zf.extractall("llama-bin")
74
+ for f in Path("llama-bin").rglob("*"):
75
+ if f.is_file():
76
+ f.chmod(f.stat().st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH)
77
+ quantize_bin = next(Path("llama-bin").rglob("llama-quantize"), None)
78
+ if quantize_bin is None:
79
+ quantize_bin = next(Path("llama-bin").rglob("quantize"), None)
80
 
81
+ if quantize_bin:
82
+ print(f"πŸ—œοΈ Quantizing to Q4_K_M with {quantize_bin}...")
83
+ subprocess.run([str(quantize_bin), "model-f16.gguf",
84
+ "model-q4_k_m.gguf", "Q4_K_M"], check=True)
85
+ out_file = "model-q4_k_m.gguf"
86
+ out_name = "wp-plugin-recommender-q4_k_m.gguf"
87
+ msg = "Add Q4_K_M GGUF (merged Qwen2.5-0.5B + LoRA)"
88
+ print("βœ… Q4_K_M GGUF created\n")
89
+ else:
90
+ print("⚠️ llama-quantize not found in release, uploading F16 GGUF instead...")
91
+ out_file = "model-f16.gguf"
92
+ out_name = "wp-plugin-recommender-f16.gguf"
93
+ msg = "Add F16 GGUF (merged Qwen2.5-0.5B + LoRA)"
94
 
95
  # ── Step 5: Upload to Hub ──────────────────────────────────────────────────────
96
  print("πŸ“€ Creating repo and uploading...")
97
  api = HfApi()
98
  api.create_repo(GGUF_REPO, exist_ok=True, private=False)
99
  api.upload_file(
100
+ path_or_fileobj=out_file,
101
+ path_in_repo=out_name,
102
  repo_id=GGUF_REPO,
103
+ commit_message=msg
104
  )
105
  print(f"\nβœ… Done β€” https://huggingface.co/mattPearce/wp-plugin-recommender-gguf")