Spaces:
Paused
Paused
File size: 8,908 Bytes
6c21523 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 | #!/usr/bin/env bash
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# deploy_changes.sh β Push local changes to GitHub + HuggingFace Space
#
# USAGE:
# chmod +x deploy_changes.sh # one-time: make it executable
# ./deploy_changes.sh "your message" # commit + push to both remotes
# ./deploy_changes.sh # uses default commit message
#
# WHAT IT DOES (in order):
# 1. Stages all modified tracked files (git add -u)
# 2. Commits with your message
# 3. Pushes to GitHub (origin β github.com/irajkooh/MultiModalRag)
# 4. Pushes to HF Space via a clean orphan branch β binary data files
# (PDF, PNG, DOCX) are excluded from the Space push because HF Space
# does not support Git LFS; those files live in the HF Dataset repo
# irajkoohi/MultiModalRag_dataset and are downloaded at Space startup.
#
# DATA FILES (persistent across Space restarts):
# - Add/remove files in data/ and run:
# python3 -c "
# from huggingface_hub import HfApi
# import os, sys
# api = HfApi(token=os.environ['HF_TOKEN'])
# api.upload_file(path_or_fileobj=sys.argv[1],
# path_in_repo='data/'+os.path.basename(sys.argv[1]),
# repo_id='irajkoohi/MultiModalRag_dataset',
# repo_type='dataset')
# " data/yourfile.pdf
#
# NOTES:
# - Untracked new files are NOT staged automatically; run `git add <file>` first
# - If GitHub push fails with "non-fast-forward", run:
# git pull --rebase origin main && ./deploy_changes.sh "retry"
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
set -euo pipefail
MSG="${1:-"chore: update app"}"
RESET_DB=false
for arg in "$@"; do [[ "$arg" == "--reset-db" ]] && RESET_DB=true; done
if $RESET_DB; then
echo "βΆ Clearing stale vectorstore from HF Hub dataset..."
python3 - <<'PYEOF'
import os, sys, re
token = os.environ.get("MultiModalRag_Token", "").strip()
if not token:
# Try loading from _secrets/HF_TOKEN.txt β extract the hf_... token line
try:
with open("_secrets/HF_TOKEN.txt") as f:
for line in f:
line = line.strip()
if re.match(r'^hf_[A-Za-z0-9]+$', line):
token = line
break
except Exception:
pass
if not token:
print("β HF token not found β skipping DB reset")
sys.exit(0)
from huggingface_hub import HfApi
api = HfApi(token=token)
repo = "irajkoohi/MultiModalRag_dataset"
try:
files = [f for f in api.list_repo_files(repo, repo_type="dataset") if f.startswith("vectorstore/")]
for f in files:
api.delete_file(path_in_repo=f, repo_id=repo, repo_type="dataset",
commit_message="reset vectorstore")
print(f"β
Cleared {len(files)} vectorstore file(s) from HF Hub dataset")
except Exception as e:
print(f"β DB reset failed: {e}")
PYEOF
fi
echo "βΆ Staging modified files..."
git add -u
# Check if there's anything to commit
if git diff --cached --quiet; then
echo "β
Nothing to commit β working tree clean."
else
echo "βΆ Committing: \"$MSG\""
git commit -m "$MSG"
fi
echo "βΆ Pushing to GitHub (origin)..."
git push origin main
# ββ Upload committed binary data files to HF Hub dataset βββββββββββββββββββββ
# PDFs/DOCX/PNGs are excluded from the Space rsync (no Git LFS support).
# Uploading them here ensures sync_from_hf_hub() can download them on Space startup.
echo "βΆ Syncing data files to HF Hub dataset (upload new + delete removed)..."
python3 - <<'PYEOF'
import os, sys, re, subprocess
from pathlib import Path
token = os.environ.get("MultiModalRag_Token", "").strip()
if not token:
try:
with open("_secrets/HF_TOKEN.txt") as f:
for line in f:
line = line.strip()
if re.match(r'^hf_[A-Za-z0-9]+$', line):
token = line
break
except Exception:
pass
if not token:
print("β HF token not found β skipping data file sync to HF Hub")
sys.exit(0)
from huggingface_hub import HfApi, CommitOperationAdd, CommitOperationDelete
api = HfApi(token=token)
repo = "irajkoohi/MultiModalRag_dataset"
result = subprocess.run(["git", "ls-files", "data/"], capture_output=True, text=True)
committed = result.stdout.splitlines()
# Top-level data files only (no subdirs like images/ or tables/)
sync_exts = {'.pdf', '.png', '.jpg', '.jpeg', '.docx', '.xlsx', '.txt'}
local_files = [
f for f in committed
if Path(f).suffix.lower() in sync_exts and '/' not in f[len("data/"):]
]
local_set = set(local_files)
# Files present on HF Hub dataset under data/ (top-level only)
hub_data_files = [
f for f in api.list_repo_files(repo, repo_type="dataset")
if f.startswith("data/") and '/' not in f[len("data/"):]
]
upload_ops = [CommitOperationAdd(path_in_repo=f, path_or_fileobj=f) for f in local_files]
delete_ops = [CommitOperationDelete(path_in_repo=f) for f in hub_data_files if f not in local_set]
all_ops = upload_ops + delete_ops
if not all_ops:
print(" Data files already in sync β nothing to do.")
sys.exit(0)
try:
api.create_commit(
repo_id=repo,
repo_type="dataset",
operations=all_ops,
commit_message="deploy: sync data files",
)
if upload_ops:
print(f"β
Uploaded {len(upload_ops)} file(s): {[Path(f).name for f in local_files]}")
if delete_ops:
to_del = [Path(f).name for f in hub_data_files if f not in local_set]
print(f"ποΈ Deleted {len(delete_ops)} stale file(s) from HF Hub: {to_del}")
except Exception as e:
print(f"β HF Hub data sync failed: {e}")
PYEOF
# ββ Upload data/tables/ (SQLite DBs) to HF Hub dataset βββββββββββββββββββββββ
echo "βΆ Syncing data/tables/ to HF Hub dataset..."
python3 - <<'PYEOF'
import os, sys, re
from pathlib import Path
token = os.environ.get("MultiModalRag_Token", "").strip()
if not token:
try:
with open("_secrets/HF_TOKEN.txt") as f:
for line in f:
line = line.strip()
if re.match(r'^hf_[A-Za-z0-9]+$', line):
token = line
break
except Exception:
pass
if not token:
print("β HF token not found β skipping tables sync to HF Hub")
sys.exit(0)
tables_dir = Path("data/tables")
if not tables_dir.exists() or not any(tables_dir.iterdir()):
print(" data/tables/ is empty β skipping.")
sys.exit(0)
from huggingface_hub import HfApi
api = HfApi(token=token)
repo = "irajkoohi/MultiModalRag_dataset"
try:
api.upload_folder(
folder_path=str(tables_dir),
path_in_repo="tables",
repo_id=repo,
repo_type="dataset",
commit_message="deploy: sync tables",
ignore_patterns=["*.lock", ".DS_Store"],
)
print(f"β
Uploaded data/tables/ to HF Hub dataset")
except Exception as e:
print(f"β Tables sync failed: {e}")
PYEOF
# ββ HF Space push via a temp directory (never touches working tree) ββββββββββ
echo "βΆ Building clean Space deploy branch (binary files excluded)..."
_tmpdir=$(mktemp -d)
# Copy entire working tree to temp dir, excluding what doesn't belong on Space
rsync -a --exclude='.git' \
--exclude='data/*.pdf' \
--exclude='data/*.png' \
--exclude='data/*.jpg' \
--exclude='data/*.jpeg' \
--exclude='data/*.docx' \
--exclude='data/*.xlsx' \
--exclude='data/images/' \
--exclude='data/tables/' \
--exclude='vectorstore/' \
--exclude='vectorstore_corrupted_backup/' \
--exclude='_secrets/' \
--exclude='.venv/' \
--exclude='__pycache__/' \
--exclude='*.pyc' \
. "$_tmpdir/"
# Build an orphan git repo in the temp dir and push it
pushd "$_tmpdir" > /dev/null
git init -q
git checkout -b space-deploy
git add -A
git commit -q -m "$MSG [space deploy]"
echo "βΆ Force-pushing to HuggingFace Space..."
git remote add space "$(cd - > /dev/null && git remote get-url space)"
git push space space-deploy:main --force
popd > /dev/null
rm -rf "$_tmpdir"
echo ""
echo "β
Deployed successfully!"
echo " GitHub : https://github.com/irajkooh/MultiModalRag"
echo " Space : https://huggingface.co/spaces/irajkoohi/MultiModalRag"
echo " Dataset: https://huggingface.co/datasets/irajkoohi/MultiModalRag_dataset"
|