Document-Audit-RAG / scripts /push_hf_space_without_docs.sh
mayankchugh-learning
Add Space README YAML, HF push script, and docs without binary assets
1d1fda9
#!/usr/bin/env bash
# Push to Hugging Face Space remote without docs/ and without tracked *.md (except optional
# root README.md). Uses git-filter-repo to remove those paths from *all commits* on a temp
# branch so HF does not reject pushes for historical binaries (e.g. PNGs under docs/).
#
# Requires: git-filter-repo (brew install git-filter-repo)
#
# Prefers a clean working tree, or HF_AUTO_STASH=1 to stash -u before / stash pop after.
#
# Usage (repo root):
# ./scripts/push_hf_space_without_docs.sh
# HF_AUTO_STASH=1 ./scripts/push_hf_space_without_docs.sh
#
# Environment overrides:
# HF_REMOTE=hf HF_DEST_REF=main HF_SOURCE_BRANCH=main
# HF_KEEP_ROOT_README=0 # also remove README.md from history
# HF_AUTO_STASH=1
set -euo pipefail
ROOT="$(git rev-parse --show-toplevel)"
cd "$ROOT"
REMOTE="${HF_REMOTE:-hf}"
DEST_REF="${HF_DEST_REF:-main}"
SOURCE_BRANCH="${HF_SOURCE_BRANCH:-main}"
KEEP_README="${HF_KEEP_ROOT_README:-1}"
AUTO_STASH="${HF_AUTO_STASH:-0}"
DID_STASH=0
if [ -n "$(git status --porcelain 2>/dev/null)" ]; then
if [ "$AUTO_STASH" = "1" ]; then
echo "Working tree dirty — stashing (including untracked), then continuing…"
git stash push -u -m "hf-space-push temp $(date +%s)"
DID_STASH=1
else
echo "error: working tree not clean — commit or stash, then retry." >&2
echo " Or run: HF_AUTO_STASH=1 $0" >&2
exit 1
fi
fi
if ! git show-ref --verify --quiet "refs/heads/$SOURCE_BRANCH"; then
echo "error: source branch '$SOURCE_BRANCH' does not exist." >&2
exit 1
fi
if ! command -v git-filter-repo >/dev/null 2>&1; then
echo "error: git-filter-repo not found. Install: brew install git-filter-repo" >&2
echo " (HF rejects pushes while old commits still contain binaries under docs/.)" >&2
exit 1
fi
TMP_BRANCH="hf-space-export-$(date +%s)"
PREV_HEAD="$(git rev-parse --abbrev-ref HEAD 2>/dev/null || echo "$SOURCE_BRANCH")"
HF_REMOTE_URL=""
if git remote get-url "$REMOTE" >/dev/null 2>&1; then
HF_REMOTE_URL="$(git remote get-url "$REMOTE")"
fi
restore() {
git checkout "$PREV_HEAD" 2>/dev/null || git checkout "$SOURCE_BRANCH"
git branch -D "$TMP_BRANCH" 2>/dev/null || true
}
git branch -D "$TMP_BRANCH" 2>/dev/null || true
git branch "$TMP_BRANCH" "$SOURCE_BRANCH"
git checkout "$TMP_BRANCH"
echo "Rewriting branch $TMP_BRANCH (strip docs/ and markdown from full history)…"
FILTER_ARGS=(--force --refs "refs/heads/$TMP_BRANCH" --invert-paths --path docs/)
while IFS= read -r f; do
[ -z "$f" ] && continue
if [ "$KEEP_README" = "1" ] && [ "$f" = "README.md" ]; then
continue
fi
FILTER_ARGS+=(--path "$f")
done < <(git ls-files '*.md')
if [ "$KEEP_README" != "1" ]; then
FILTER_ARGS+=(--path README.md)
fi
GIT_FILTER_BRANCH_SQUELCH_WARNING=1 git filter-repo "${FILTER_ARGS[@]}"
if [ -n "$HF_REMOTE_URL" ] && ! git remote get-url "$REMOTE" >/dev/null 2>&1; then
git remote add "$REMOTE" "$HF_REMOTE_URL"
echo "Re-added remote $REMOTE (git-filter-repo removed it)."
fi
if ! git push "$REMOTE" "$TMP_BRANCH:$DEST_REF" --force; then
restore
if [ "$DID_STASH" = "1" ]; then
git stash pop || echo "warning: stash pop failed — run git stash list" >&2
fi
exit 1
fi
git checkout "$PREV_HEAD"
git branch -D "$TMP_BRANCH"
if [ "$DID_STASH" = "1" ]; then
if ! git stash pop; then
echo "warning: stash pop reported conflicts — fix with git status." >&2
fi
fi
echo "OK: pushed to $REMOTE $DEST_REF (docs/ + *.md stripped from history; README kept: KEEP_README=$KEEP_README)."