#!/usr/bin/env bash # Upload everything under cache/ to Hugging Face (separate repo from code). # Uses Git LFS for weight and checkpoint files. Long-running; run under tmux/screen. # # Requires: .env with HF_TOKEN, git-lfs # Usage: bash scripts/push_cache_to_hf.sh set -eo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" CACHE_DIR="${REPO_ROOT}/cache" export HF_CACHE_REPO_ID="${HF_CACHE_REPO_ID:-psidharth567/neuralese_cache}" cd "${REPO_ROOT}" set -a # shellcheck disable=SC1091 source "${REPO_ROOT}/.env" set +a python3 << 'PY' import os from huggingface_hub import HfApi token = os.environ.get("HF_TOKEN") if not token: raise SystemExit("HF_TOKEN missing in .env") repo_id = os.environ["HF_CACHE_REPO_ID"] HfApi(token=token).create_repo(repo_id, exist_ok=True, repo_type="model") print("HF repo ready:", repo_id) PY cd "${CACHE_DIR}" if [[ -d .git ]]; then echo "cache/ already has a .git repo. Remove it first if you want a fresh push: rm -rf cache/.git" exit 1 fi git init -b main git lfs install git lfs track "*.safetensors" git lfs track "*.bin" git lfs track "*.pt" git lfs track "*.pth" git lfs track "*.ckpt" git lfs track "*.safetensors.index" git lfs track "*.wandb" # HF hub cache stores extensionless blobs (multi-GB); keep them in LFS. git lfs track "**/blobs/**" git config user.email "${GIT_AUTHOR_EMAIL:-neuralese@users.noreply.huggingface.co}" git config user.name "${GIT_AUTHOR_NAME:-neuralese-cache}" git add . git commit -m "Neuralese cache snapshot ($(date -u +%Y-%m-%dT%H:%MZ))" git remote add origin "https://oauth2:${HF_TOKEN}@huggingface.co/${HF_CACHE_REPO_ID}" git push -u origin main --force echo "Done pushing ${CACHE_DIR} -> https://huggingface.co/${HF_CACHE_REPO_ID}"