neuralese_temp / scripts /push_cache_to_hf.sh
psidharth567's picture
Add scripts to push cache to HF and code to GitHub
fb55f63
#!/usr/bin/env bash
# Upload everything under cache/ to Hugging Face (separate repo from code).
# Uses Git LFS for weight and checkpoint files. Long-running; run under tmux/screen.
#
# Requires: .env with HF_TOKEN, git-lfs
# Usage: bash scripts/push_cache_to_hf.sh
set -eo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
CACHE_DIR="${REPO_ROOT}/cache"
export HF_CACHE_REPO_ID="${HF_CACHE_REPO_ID:-psidharth567/neuralese_cache}"
cd "${REPO_ROOT}"
set -a
# shellcheck disable=SC1091
source "${REPO_ROOT}/.env"
set +a
python3 << 'PY'
import os
from huggingface_hub import HfApi
token = os.environ.get("HF_TOKEN")
if not token:
raise SystemExit("HF_TOKEN missing in .env")
repo_id = os.environ["HF_CACHE_REPO_ID"]
HfApi(token=token).create_repo(repo_id, exist_ok=True, repo_type="model")
print("HF repo ready:", repo_id)
PY
cd "${CACHE_DIR}"
if [[ -d .git ]]; then
echo "cache/ already has a .git repo. Remove it first if you want a fresh push: rm -rf cache/.git"
exit 1
fi
git init -b main
git lfs install
git lfs track "*.safetensors"
git lfs track "*.bin"
git lfs track "*.pt"
git lfs track "*.pth"
git lfs track "*.ckpt"
git lfs track "*.safetensors.index"
git lfs track "*.wandb"
# HF hub cache stores extensionless blobs (multi-GB); keep them in LFS.
git lfs track "**/blobs/**"
git config user.email "${GIT_AUTHOR_EMAIL:-neuralese@users.noreply.huggingface.co}"
git config user.name "${GIT_AUTHOR_NAME:-neuralese-cache}"
git add .
git commit -m "Neuralese cache snapshot ($(date -u +%Y-%m-%dT%H:%MZ))"
git remote add origin "https://oauth2:${HF_TOKEN}@huggingface.co/${HF_CACHE_REPO_ID}"
git push -u origin main --force
echo "Done pushing ${CACHE_DIR} -> https://huggingface.co/${HF_CACHE_REPO_ID}"