blitzkode / scripts /push_all_to_hub.py
neuralbroker's picture
Add scripts/push_all_to_hub.py
f40c7ba verified
raw
history blame
5.39 kB
#!/usr/bin/env python3
"""Push all BlitzKode artifacts to HuggingFace Hub in one command.
Uploads
-------
1. LoRA adapter (1.5B) → neuralbroker/blitzkode-1.5b-lora
2. LoRA adapter (0.5B) → neuralbroker/blitzkode-lora-0.5b
3. GGUF model file → neuralbroker/blitzkode (into a GGUF-specific branch/folder)
Usage
-----
# Export HF_TOKEN first, then run:
python scripts/push_all_to_hub.py
# Or pass token directly:
python scripts/push_all_to_hub.py --token hf_XXXX
# Dry-run to validate without pushing:
python scripts/push_all_to_hub.py --dry-run
"""
from __future__ import annotations
import argparse
import os
import subprocess
import sys
from pathlib import Path
REPO_ROOT = Path(__file__).resolve().parents[1]
PUSH_SCRIPT = REPO_ROOT / "scripts" / "push_to_hub.py"
ARTIFACTS = [
{
"label": "1.5B LoRA adapter (primary)",
"checkpoint": REPO_ROOT / "checkpoints" / "blitzkode-1.5b-lora" / "final",
"repo_id": "neuralbroker/blitzkode-1.5b-lora",
"commit_message": "Upload BlitzKode 1.5B LoRA adapter v2.1 (100-step SFT)",
},
{
"label": "0.5B LoRA adapter (lightweight)",
"checkpoint": REPO_ROOT / "checkpoints" / "available-lora-0.5b-full" / "final",
"repo_id": "neuralbroker/blitzkode-lora-0.5b",
"commit_message": "Upload BlitzKode 0.5B LoRA adapter v2.1 (50-step SFT)",
},
]
def push_gguf(token: str, gguf_path: Path, dry_run: bool) -> None:
if not gguf_path.exists():
print(f" [SKIP] GGUF not found: {gguf_path}")
return
size_gb = gguf_path.stat().st_size / 1024 ** 3
print(f"\n Uploading GGUF ({size_gb:.2f} GB) → neuralbroker/blitzkode ...")
if dry_run:
print(" [DRY RUN] skipped.")
return
from huggingface_hub import HfApi # noqa: PLC0415
from huggingface_hub.utils import HfHubHTTPError # noqa: PLC0415
api = HfApi(token=token)
try:
api.create_repo("neuralbroker/blitzkode", repo_type="model", exist_ok=True)
api.upload_file(
path_or_fileobj=str(gguf_path),
path_in_repo="blitzkode.gguf",
repo_id="neuralbroker/blitzkode",
repo_type="model",
commit_message="Update GGUF model Q8_0 (1.5B merged + quantised)",
)
print(" [OK] GGUF uploaded → https://huggingface.co/neuralbroker/blitzkode")
except HfHubHTTPError as exc:
print(f" [ERROR] GGUF upload failed: {exc}", file=sys.stderr)
def main() -> None:
parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument("--token", default=os.environ.get("HF_TOKEN", ""), help="HuggingFace write token (or set HF_TOKEN env var).")
parser.add_argument("--dry-run", action="store_true", help="Validate only, do not push.")
args = parser.parse_args()
token = args.token.strip()
if not token and not args.dry_run:
print(
"\n[ERROR] HuggingFace token required.\n"
" Option 1: export HF_TOKEN=hf_XXXX\n"
" Option 2: python scripts/push_all_to_hub.py --token hf_XXXX\n"
" Option 3: run --dry-run to validate without pushing\n"
"\nGet a write token at: https://huggingface.co/settings/tokens",
file=sys.stderr,
)
sys.exit(1)
print("=" * 72)
print("BLITZKODE — PUSH ALL ARTIFACTS TO HUGGING FACE HUB")
if args.dry_run:
print("(DRY RUN — nothing will be pushed)")
print("=" * 72)
failures: list[str] = []
for art in ARTIFACTS:
print(f"\n{'─' * 60}")
print(f" Artifact : {art['label']}")
print(f" Repo : {art['repo_id']}")
checkpoint: Path = art["checkpoint"]
if not checkpoint.exists():
print(f" [SKIP] Checkpoint not found: {checkpoint}")
continue
cmd = [
sys.executable,
str(PUSH_SCRIPT),
"--checkpoint",
str(checkpoint),
"--repo-id",
art["repo_id"],
"--commit-message",
art["commit_message"],
]
if args.dry_run:
cmd.append("--dry-run")
if token:
cmd += ["--token", token]
result = subprocess.run(cmd)
if result.returncode != 0:
failures.append(art["repo_id"])
print(f" [FAIL] Push exited with code {result.returncode}", file=sys.stderr)
# GGUF upload (direct via huggingface_hub)
print(f"\n{'─' * 60}")
print(" Artifact : GGUF model (neuralbroker/blitzkode)")
if not args.dry_run and token:
push_gguf(token, REPO_ROOT / "blitzkode.gguf", dry_run=False)
else:
push_gguf(token, REPO_ROOT / "blitzkode.gguf", dry_run=True)
# Summary
print(f"\n{'=' * 72}")
if failures:
print(f"PUSH FINISHED WITH FAILURES: {failures}")
sys.exit(1)
else:
print("ALL PUSHES COMPLETE")
print("\nHuggingFace repos:")
for art in ARTIFACTS:
print(f" https://huggingface.co/{art['repo_id']}")
print(" https://huggingface.co/neuralbroker/blitzkode (GGUF)")
if __name__ == "__main__":
main()