| | |
| | """ |
| | Convert zindango-slm to GGUF and push to Hugging Face. |
| | |
| | Requires: llama.cpp cloned, gguf, sentencepiece |
| | pip install gguf sentencepiece |
| | git clone https://github.com/ggml-org/llama.cpp |
| | |
| | Usage: |
| | python scripts/convert_and_push_gguf.py [--model-dir PATH] [--quantize Q4_K_M] |
| | """ |
| |
|
| | import argparse |
| | import subprocess |
| | import sys |
| | from pathlib import Path |
| |
|
| | from huggingface_hub import HfApi, create_repo, upload_folder, upload_file |
| |
|
| |
|
| | def main(): |
| | parser = argparse.ArgumentParser() |
| | parser.add_argument("--model-dir", default="outputs/zindango-slm-20260215_124754") |
| | parser.add_argument("--llama-cpp", default="/home/piren/projects/llama.cpp") |
| | parser.add_argument("--quantize", choices=["q4_k_m", "q5_k_m", "q8_0", "none"], default="none") |
| | parser.add_argument("--repo-id", default=None) |
| | parser.add_argument("--skip-create", action="store_true") |
| | parser.add_argument("--push-only", action="store_true", help="Skip conversion, only push existing GGUF") |
| | args = parser.parse_args() |
| |
|
| | project_root = Path(__file__).resolve().parent.parent |
| | model_dir = project_root / args.model_dir |
| | out_dir = project_root / "outputs" |
| | f16_gguf = out_dir / "zindango-slm-f16.gguf" |
| |
|
| | if not args.push_only: |
| | llama_cpp = Path(args.llama_cpp) |
| | if not (llama_cpp / "convert_hf_to_gguf.py").exists(): |
| | raise SystemExit(f"llama.cpp not found at {llama_cpp}. Clone it first.") |
| | if not model_dir.exists(): |
| | raise SystemExit(f"Model not found: {model_dir}") |
| |
|
| | |
| | cmd = [ |
| | sys.executable, |
| | str(llama_cpp / "convert_hf_to_gguf.py"), |
| | str(model_dir), |
| | "--outtype", "f16", |
| | "--outfile", str(f16_gguf), |
| | ] |
| | print("Converting to GGUF f16...") |
| | subprocess.run(cmd, check=True) |
| |
|
| | |
| | if args.quantize != "none": |
| | quant_bin = llama_cpp / "build" / "bin" / "llama-quantize" |
| | if not quant_bin.exists(): |
| | quant_bin = llama_cpp / "bin" / "llama-quantize" |
| | if quant_bin.exists(): |
| | q_gguf = out_dir / f"zindango-slm-{args.quantize}.gguf" |
| | cmd = [str(quant_bin), str(f16_gguf), str(q_gguf), args.quantize.upper()] |
| | print(f"Quantizing to {args.quantize}...") |
| | subprocess.run(cmd, check=True) |
| | else: |
| | print("llama-quantize not found; skipping quantization") |
| |
|
| | |
| | api = HfApi() |
| | user = api.whoami() |
| | username = user["name"] |
| | repo_id = args.repo_id or f"{username}/zindango-slm" |
| |
|
| | if not args.skip_create: |
| | try: |
| | create_repo(repo_id, repo_type="model", exist_ok=True) |
| | except Exception as e: |
| | if "403" in str(e).lower() or "forbidden" in str(e).lower(): |
| | print("Create repo failed. Run with --skip-create after creating manually.") |
| | raise |
| |
|
| | |
| | quant_ggufs = list(out_dir.glob("zindango-slm-q*.gguf")) + list(out_dir.glob("zindango-slm-Q*.gguf")) |
| | for gguf_path in [f16_gguf] + quant_ggufs: |
| | if gguf_path.exists(): |
| | print(f"Uploading {gguf_path.name}...") |
| | upload_file( |
| | path_or_fileobj=str(gguf_path), |
| | path_in_repo=gguf_path.name, |
| | repo_id=repo_id, |
| | repo_type="model", |
| | commit_message=f"Add {gguf_path.name}", |
| | ) |
| |
|
| | print(f"Done. Model: https://huggingface.co/{repo_id}") |
| |
|
| |
|
| | if __name__ == "__main__": |
| | main() |
| |
|