#!/usr/bin/env python3 """ Convert zindango-slm to GGUF and push to Hugging Face. Requires: llama.cpp cloned, gguf, sentencepiece pip install gguf sentencepiece git clone https://github.com/ggml-org/llama.cpp Usage: python scripts/convert_and_push_gguf.py [--model-dir PATH] [--quantize Q4_K_M] """ import argparse import subprocess import sys from pathlib import Path from huggingface_hub import HfApi, create_repo, upload_folder, upload_file def main(): parser = argparse.ArgumentParser() parser.add_argument("--model-dir", default="outputs/zindango-slm-20260215_124754") parser.add_argument("--llama-cpp", default="/home/piren/projects/llama.cpp") parser.add_argument("--quantize", choices=["q4_k_m", "q5_k_m", "q8_0", "none"], default="none") parser.add_argument("--repo-id", default=None) parser.add_argument("--skip-create", action="store_true") parser.add_argument("--push-only", action="store_true", help="Skip conversion, only push existing GGUF") args = parser.parse_args() project_root = Path(__file__).resolve().parent.parent model_dir = project_root / args.model_dir out_dir = project_root / "outputs" f16_gguf = out_dir / "zindango-slm-f16.gguf" if not args.push_only: llama_cpp = Path(args.llama_cpp) if not (llama_cpp / "convert_hf_to_gguf.py").exists(): raise SystemExit(f"llama.cpp not found at {llama_cpp}. Clone it first.") if not model_dir.exists(): raise SystemExit(f"Model not found: {model_dir}") # Convert to F16 GGUF cmd = [ sys.executable, str(llama_cpp / "convert_hf_to_gguf.py"), str(model_dir), "--outtype", "f16", "--outfile", str(f16_gguf), ] print("Converting to GGUF f16...") subprocess.run(cmd, check=True) # Optionally quantize if args.quantize != "none": quant_bin = llama_cpp / "build" / "bin" / "llama-quantize" if not quant_bin.exists(): quant_bin = llama_cpp / "bin" / "llama-quantize" if quant_bin.exists(): q_gguf = out_dir / f"zindango-slm-{args.quantize}.gguf" cmd = [str(quant_bin), str(f16_gguf), str(q_gguf), args.quantize.upper()] print(f"Quantizing to {args.quantize}...") subprocess.run(cmd, check=True) else: print("llama-quantize not found; skipping quantization") # Push to Hub api = HfApi() user = api.whoami() username = user["name"] repo_id = args.repo_id or f"{username}/zindango-slm" if not args.skip_create: try: create_repo(repo_id, repo_type="model", exist_ok=True) except Exception as e: if "403" in str(e).lower() or "forbidden" in str(e).lower(): print("Create repo failed. Run with --skip-create after creating manually.") raise # Upload GGUF file(s): f16 + any quantized (q4_k_m, q8_0, etc.) quant_ggufs = list(out_dir.glob("zindango-slm-q*.gguf")) + list(out_dir.glob("zindango-slm-Q*.gguf")) for gguf_path in [f16_gguf] + quant_ggufs: if gguf_path.exists(): print(f"Uploading {gguf_path.name}...") upload_file( path_or_fileobj=str(gguf_path), path_in_repo=gguf_path.name, repo_id=repo_id, repo_type="model", commit_message=f"Add {gguf_path.name}", ) print(f"Done. Model: https://huggingface.co/{repo_id}") if __name__ == "__main__": main()