zindango-slm / scripts /convert_and_push_gguf.py
ksjpswaroop's picture
Add scripts/convert_and_push_gguf.py
1fa99db verified
#!/usr/bin/env python3
"""
Convert zindango-slm to GGUF and push to Hugging Face.
Requires: llama.cpp cloned, gguf, sentencepiece
pip install gguf sentencepiece
git clone https://github.com/ggml-org/llama.cpp
Usage:
python scripts/convert_and_push_gguf.py [--model-dir PATH] [--quantize Q4_K_M]
"""
import argparse
import subprocess
import sys
from pathlib import Path
from huggingface_hub import HfApi, create_repo, upload_folder, upload_file
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--model-dir", default="outputs/zindango-slm-20260215_124754")
parser.add_argument("--llama-cpp", default="/home/piren/projects/llama.cpp")
parser.add_argument("--quantize", choices=["q4_k_m", "q5_k_m", "q8_0", "none"], default="none")
parser.add_argument("--repo-id", default=None)
parser.add_argument("--skip-create", action="store_true")
parser.add_argument("--push-only", action="store_true", help="Skip conversion, only push existing GGUF")
args = parser.parse_args()
project_root = Path(__file__).resolve().parent.parent
model_dir = project_root / args.model_dir
out_dir = project_root / "outputs"
f16_gguf = out_dir / "zindango-slm-f16.gguf"
if not args.push_only:
llama_cpp = Path(args.llama_cpp)
if not (llama_cpp / "convert_hf_to_gguf.py").exists():
raise SystemExit(f"llama.cpp not found at {llama_cpp}. Clone it first.")
if not model_dir.exists():
raise SystemExit(f"Model not found: {model_dir}")
# Convert to F16 GGUF
cmd = [
sys.executable,
str(llama_cpp / "convert_hf_to_gguf.py"),
str(model_dir),
"--outtype", "f16",
"--outfile", str(f16_gguf),
]
print("Converting to GGUF f16...")
subprocess.run(cmd, check=True)
# Optionally quantize
if args.quantize != "none":
quant_bin = llama_cpp / "build" / "bin" / "llama-quantize"
if not quant_bin.exists():
quant_bin = llama_cpp / "bin" / "llama-quantize"
if quant_bin.exists():
q_gguf = out_dir / f"zindango-slm-{args.quantize}.gguf"
cmd = [str(quant_bin), str(f16_gguf), str(q_gguf), args.quantize.upper()]
print(f"Quantizing to {args.quantize}...")
subprocess.run(cmd, check=True)
else:
print("llama-quantize not found; skipping quantization")
# Push to Hub
api = HfApi()
user = api.whoami()
username = user["name"]
repo_id = args.repo_id or f"{username}/zindango-slm"
if not args.skip_create:
try:
create_repo(repo_id, repo_type="model", exist_ok=True)
except Exception as e:
if "403" in str(e).lower() or "forbidden" in str(e).lower():
print("Create repo failed. Run with --skip-create after creating manually.")
raise
# Upload GGUF file(s): f16 + any quantized (q4_k_m, q8_0, etc.)
quant_ggufs = list(out_dir.glob("zindango-slm-q*.gguf")) + list(out_dir.glob("zindango-slm-Q*.gguf"))
for gguf_path in [f16_gguf] + quant_ggufs:
if gguf_path.exists():
print(f"Uploading {gguf_path.name}...")
upload_file(
path_or_fileobj=str(gguf_path),
path_in_repo=gguf_path.name,
repo_id=repo_id,
repo_type="model",
commit_message=f"Add {gguf_path.name}",
)
print(f"Done. Model: https://huggingface.co/{repo_id}")
if __name__ == "__main__":
main()