File size: 3,631 Bytes

1fa99db

#!/usr/bin/env python3
"""
Convert zindango-slm to GGUF and push to Hugging Face.

Requires: llama.cpp cloned, gguf, sentencepiece
  pip install gguf sentencepiece
  git clone https://github.com/ggml-org/llama.cpp

Usage:
  python scripts/convert_and_push_gguf.py [--model-dir PATH] [--quantize Q4_K_M]
"""

import argparse
import subprocess
import sys
from pathlib import Path

from huggingface_hub import HfApi, create_repo, upload_folder, upload_file


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--model-dir", default="outputs/zindango-slm-20260215_124754")
    parser.add_argument("--llama-cpp", default="/home/piren/projects/llama.cpp")
    parser.add_argument("--quantize", choices=["q4_k_m", "q5_k_m", "q8_0", "none"], default="none")
    parser.add_argument("--repo-id", default=None)
    parser.add_argument("--skip-create", action="store_true")
    parser.add_argument("--push-only", action="store_true", help="Skip conversion, only push existing GGUF")
    args = parser.parse_args()

    project_root = Path(__file__).resolve().parent.parent
    model_dir = project_root / args.model_dir
    out_dir = project_root / "outputs"
    f16_gguf = out_dir / "zindango-slm-f16.gguf"

    if not args.push_only:
        llama_cpp = Path(args.llama_cpp)
        if not (llama_cpp / "convert_hf_to_gguf.py").exists():
            raise SystemExit(f"llama.cpp not found at {llama_cpp}. Clone it first.")
        if not model_dir.exists():
            raise SystemExit(f"Model not found: {model_dir}")

        # Convert to F16 GGUF
        cmd = [
            sys.executable,
            str(llama_cpp / "convert_hf_to_gguf.py"),
            str(model_dir),
            "--outtype", "f16",
            "--outfile", str(f16_gguf),
        ]
        print("Converting to GGUF f16...")
        subprocess.run(cmd, check=True)

        # Optionally quantize
        if args.quantize != "none":
            quant_bin = llama_cpp / "build" / "bin" / "llama-quantize"
            if not quant_bin.exists():
                quant_bin = llama_cpp / "bin" / "llama-quantize"
            if quant_bin.exists():
                q_gguf = out_dir / f"zindango-slm-{args.quantize}.gguf"
                cmd = [str(quant_bin), str(f16_gguf), str(q_gguf), args.quantize.upper()]
                print(f"Quantizing to {args.quantize}...")
                subprocess.run(cmd, check=True)
            else:
                print("llama-quantize not found; skipping quantization")

    # Push to Hub
    api = HfApi()
    user = api.whoami()
    username = user["name"]
    repo_id = args.repo_id or f"{username}/zindango-slm"

    if not args.skip_create:
        try:
            create_repo(repo_id, repo_type="model", exist_ok=True)
        except Exception as e:
            if "403" in str(e).lower() or "forbidden" in str(e).lower():
                print("Create repo failed. Run with --skip-create after creating manually.")
                raise

    # Upload GGUF file(s): f16 + any quantized (q4_k_m, q8_0, etc.)
    quant_ggufs = list(out_dir.glob("zindango-slm-q*.gguf")) + list(out_dir.glob("zindango-slm-Q*.gguf"))
    for gguf_path in [f16_gguf] + quant_ggufs:
        if gguf_path.exists():
            print(f"Uploading {gguf_path.name}...")
            upload_file(
                path_or_fileobj=str(gguf_path),
                path_in_repo=gguf_path.name,
                repo_id=repo_id,
                repo_type="model",
                commit_message=f"Add {gguf_path.name}",
            )

    print(f"Done. Model: https://huggingface.co/{repo_id}")


if __name__ == "__main__":
    main()