Spaces:
Paused
Paused
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # deploy_changes.sh β Push local changes to GitHub + HuggingFace Space | |
| # | |
| # USAGE: | |
| # chmod +x deploy_changes.sh # one-time: make it executable | |
| # ./deploy_changes.sh "your message" # commit + push to both remotes | |
| # ./deploy_changes.sh # uses default commit message | |
| # | |
| # WHAT IT DOES (in order): | |
| # 1. Stages all modified tracked files (git add -u) | |
| # 2. Commits with your message | |
| # 3. Pushes to GitHub (origin β github.com/irajkooh/MultiModalRag) | |
| # 4. Pushes to HF Space via a clean orphan branch β binary data files | |
| # (PDF, PNG, DOCX) are excluded from the Space push because HF Space | |
| # does not support Git LFS; those files live in the HF Dataset repo | |
| # irajkoohi/MultiModalRag_dataset and are downloaded at Space startup. | |
| # | |
| # DATA FILES (persistent across Space restarts): | |
| # - Add/remove files in data/ and run: | |
| # python3 -c " | |
| # from huggingface_hub import HfApi | |
| # import os, sys | |
| # api = HfApi(token=os.environ['HF_TOKEN']) | |
| # api.upload_file(path_or_fileobj=sys.argv[1], | |
| # path_in_repo='data/'+os.path.basename(sys.argv[1]), | |
| # repo_id='irajkoohi/MultiModalRag_dataset', | |
| # repo_type='dataset') | |
| # " data/yourfile.pdf | |
| # | |
| # NOTES: | |
| # - Untracked new files are NOT staged automatically; run `git add <file>` first | |
| # - If GitHub push fails with "non-fast-forward", run: | |
| # git pull --rebase origin main && ./deploy_changes.sh "retry" | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| set -euo pipefail | |
| MSG="${1:-"chore: update app"}" | |
| RESET_DB=false | |
| for arg in "$@"; do [[ "$arg" == "--reset-db" ]] && RESET_DB=true; done | |
| if $RESET_DB; then | |
| echo "βΆ Clearing stale vectorstore from HF Hub dataset..." | |
| python3 - <<'PYEOF' | |
| import os, sys, re | |
| token = os.environ.get("MultiModalRag_Token", "").strip() | |
| if not token: | |
| # Try loading from _secrets/HF_TOKEN.txt β extract the hf_... token line | |
| try: | |
| with open("_secrets/HF_TOKEN.txt") as f: | |
| for line in f: | |
| line = line.strip() | |
| if re.match(r'^hf_[A-Za-z0-9]+$', line): | |
| token = line | |
| break | |
| except Exception: | |
| pass | |
| if not token: | |
| print("β HF token not found β skipping DB reset") | |
| sys.exit(0) | |
| from huggingface_hub import HfApi | |
| api = HfApi(token=token) | |
| repo = "irajkoohi/MultiModalRag_dataset" | |
| try: | |
| files = [f for f in api.list_repo_files(repo, repo_type="dataset") if f.startswith("vectorstore/")] | |
| for f in files: | |
| api.delete_file(path_in_repo=f, repo_id=repo, repo_type="dataset", | |
| commit_message="reset vectorstore") | |
| print(f"β Cleared {len(files)} vectorstore file(s) from HF Hub dataset") | |
| except Exception as e: | |
| print(f"β DB reset failed: {e}") | |
| PYEOF | |
| fi | |
| echo "βΆ Staging modified files..." | |
| git add -u | |
| # Check if there's anything to commit | |
| if git diff --cached --quiet; then | |
| echo "β Nothing to commit β working tree clean." | |
| else | |
| echo "βΆ Committing: \"$MSG\"" | |
| git commit -m "$MSG" | |
| fi | |
| echo "βΆ Pushing to GitHub (origin)..." | |
| git push origin main | |
| # ββ Upload committed binary data files to HF Hub dataset βββββββββββββββββββββ | |
| # PDFs/DOCX/PNGs are excluded from the Space rsync (no Git LFS support). | |
| # Uploading them here ensures sync_from_hf_hub() can download them on Space startup. | |
| echo "βΆ Syncing data files to HF Hub dataset (upload new + delete removed)..." | |
| python3 - <<'PYEOF' | |
| import os, sys, re, subprocess | |
| from pathlib import Path | |
| token = os.environ.get("MultiModalRag_Token", "").strip() | |
| if not token: | |
| try: | |
| with open("_secrets/HF_TOKEN.txt") as f: | |
| for line in f: | |
| line = line.strip() | |
| if re.match(r'^hf_[A-Za-z0-9]+$', line): | |
| token = line | |
| break | |
| except Exception: | |
| pass | |
| if not token: | |
| print("β HF token not found β skipping data file sync to HF Hub") | |
| sys.exit(0) | |
| from huggingface_hub import HfApi, CommitOperationAdd, CommitOperationDelete | |
| api = HfApi(token=token) | |
| repo = "irajkoohi/MultiModalRag_dataset" | |
| result = subprocess.run(["git", "ls-files", "data/"], capture_output=True, text=True) | |
| committed = result.stdout.splitlines() | |
| # Top-level data files only (no subdirs like images/ or tables/) | |
| sync_exts = {'.pdf', '.png', '.jpg', '.jpeg', '.docx', '.xlsx', '.txt'} | |
| local_files = [ | |
| f for f in committed | |
| if Path(f).suffix.lower() in sync_exts and '/' not in f[len("data/"):] | |
| ] | |
| local_set = set(local_files) | |
| # Files present on HF Hub dataset under data/ (top-level only) | |
| hub_data_files = [ | |
| f for f in api.list_repo_files(repo, repo_type="dataset") | |
| if f.startswith("data/") and '/' not in f[len("data/"):] | |
| ] | |
| upload_ops = [CommitOperationAdd(path_in_repo=f, path_or_fileobj=f) for f in local_files] | |
| delete_ops = [CommitOperationDelete(path_in_repo=f) for f in hub_data_files if f not in local_set] | |
| all_ops = upload_ops + delete_ops | |
| if not all_ops: | |
| print(" Data files already in sync β nothing to do.") | |
| sys.exit(0) | |
| try: | |
| api.create_commit( | |
| repo_id=repo, | |
| repo_type="dataset", | |
| operations=all_ops, | |
| commit_message="deploy: sync data files", | |
| ) | |
| if upload_ops: | |
| print(f"β Uploaded {len(upload_ops)} file(s): {[Path(f).name for f in local_files]}") | |
| if delete_ops: | |
| to_del = [Path(f).name for f in hub_data_files if f not in local_set] | |
| print(f"ποΈ Deleted {len(delete_ops)} stale file(s) from HF Hub: {to_del}") | |
| except Exception as e: | |
| print(f"β HF Hub data sync failed: {e}") | |
| PYEOF | |
| # ββ Upload data/tables/ (SQLite DBs) to HF Hub dataset βββββββββββββββββββββββ | |
| echo "βΆ Syncing data/tables/ to HF Hub dataset..." | |
| python3 - <<'PYEOF' | |
| import os, sys, re | |
| from pathlib import Path | |
| token = os.environ.get("MultiModalRag_Token", "").strip() | |
| if not token: | |
| try: | |
| with open("_secrets/HF_TOKEN.txt") as f: | |
| for line in f: | |
| line = line.strip() | |
| if re.match(r'^hf_[A-Za-z0-9]+$', line): | |
| token = line | |
| break | |
| except Exception: | |
| pass | |
| if not token: | |
| print("β HF token not found β skipping tables sync to HF Hub") | |
| sys.exit(0) | |
| tables_dir = Path("data/tables") | |
| if not tables_dir.exists() or not any(tables_dir.iterdir()): | |
| print(" data/tables/ is empty β skipping.") | |
| sys.exit(0) | |
| from huggingface_hub import HfApi | |
| api = HfApi(token=token) | |
| repo = "irajkoohi/MultiModalRag_dataset" | |
| try: | |
| api.upload_folder( | |
| folder_path=str(tables_dir), | |
| path_in_repo="tables", | |
| repo_id=repo, | |
| repo_type="dataset", | |
| commit_message="deploy: sync tables", | |
| ignore_patterns=["*.lock", ".DS_Store"], | |
| ) | |
| print(f"β Uploaded data/tables/ to HF Hub dataset") | |
| except Exception as e: | |
| print(f"β Tables sync failed: {e}") | |
| PYEOF | |
| # ββ HF Space push via a temp directory (never touches working tree) ββββββββββ | |
| echo "βΆ Building clean Space deploy branch (binary files excluded)..." | |
| _tmpdir=$(mktemp -d) | |
| # Copy entire working tree to temp dir, excluding what doesn't belong on Space | |
| rsync -a --exclude='.git' \ | |
| --exclude='data/*.pdf' \ | |
| --exclude='data/*.png' \ | |
| --exclude='data/*.jpg' \ | |
| --exclude='data/*.jpeg' \ | |
| --exclude='data/*.docx' \ | |
| --exclude='data/*.xlsx' \ | |
| --exclude='data/images/' \ | |
| --exclude='data/tables/' \ | |
| --exclude='vectorstore/' \ | |
| --exclude='vectorstore_corrupted_backup/' \ | |
| --exclude='_secrets/' \ | |
| --exclude='.venv/' \ | |
| --exclude='__pycache__/' \ | |
| --exclude='*.pyc' \ | |
| . "$_tmpdir/" | |
| # Build an orphan git repo in the temp dir and push it | |
| pushd "$_tmpdir" > /dev/null | |
| git init -q | |
| git checkout -b space-deploy | |
| git add -A | |
| git commit -q -m "$MSG [space deploy]" | |
| echo "βΆ Force-pushing to HuggingFace Space..." | |
| git remote add space "$(cd - > /dev/null && git remote get-url space)" | |
| git push space space-deploy:main --force | |
| popd > /dev/null | |
| rm -rf "$_tmpdir" | |
| echo "" | |
| echo "β Deployed successfully!" | |
| echo " GitHub : https://github.com/irajkooh/MultiModalRag" | |
| echo " Space : https://huggingface.co/spaces/irajkoohi/MultiModalRag" | |
| echo " Dataset: https://huggingface.co/datasets/irajkoohi/MultiModalRag_dataset" | |