"""Deploy Bee backend to HF Space `cuilabs/bee` via curated git push. The Space's Dockerfile only consumes a subset of the repo. Pushing the full monorepo (apps/, packages/, docs/, tests/, supabase/, ...) bloats the Space's git history with ~140k lines that the Docker build ignores. This script builds a focused deploy by: 1. Resolving the current `master` commit SHA. 2. Copying ONLY the paths the Dockerfile needs into a temp dir. 3. Initialising a fresh git repo there, committing as "HF Space backend deploy []". 4. Force-pushing to the space remote's `main` branch — HF Spaces build from the current tree, not the git history; force-push is correct (no commit data is lost; the source of truth is GitHub). 5. Cleaning up the temp dir. The Space rebuild starts automatically after the push (~2-10 min, visible at https://huggingface.co/spaces/cuilabs/bee). Usage: python scripts/deploy_hf_space.py [--dry-run] Authentication: the script reuses the credentials baked into the `space` git remote (https://huggingface.co/spaces/cuilabs/bee). If you've never pushed before, run `huggingface-cli login` first or set HF_TOKEN in the environment so the http auth helper picks it up. Curation list — kept in sync with the Dockerfile's COPY directives. Update both when adding new runtime dependencies. """ from __future__ import annotations import argparse import os import shutil import subprocess import sys import tempfile from pathlib import Path ROOT = Path(__file__).resolve().parent.parent # Files / dirs the Space's Dockerfile depends on. If you add a COPY in # Dockerfile, also add the path here. REQUIRED_PATHS = [ "Dockerfile", "requirements.docker.txt", "requirements.txt", "README.md", ".env.example", "pyproject.toml", "bee", "scripts", ] # Optional — present locally during dev, shipped only if they exist. # (Note: the chat UI moved to bee/static/ in 770a763, so a top-level # `static/` is no longer expected; the bee/ copy covers it.) OPTIONAL_PATHS = [ "data/datasets", "data/rag_index", "data/lora_checkpoints", ] # Patterns to exclude when copying directories — keep the Space lean. IGNORE = shutil.ignore_patterns( "__pycache__", "*.pyc", "*.pyo", ".pytest_cache", ".DS_Store", ".mypy_cache", ".ruff_cache", "*.log", ".venv", "node_modules", ) # HF rejects pushes containing files larger than this (10 MiB). The Space # downloads its real artifacts (adapters, RAG indices) at runtime via # bee/hub_sync.py from HF Hub — pre-baked large files are dev-only # cruft that shouldn't be in the deploy. MAX_FILE_SIZE = 10 * 1024 * 1024 SPACE_REMOTE = "https://huggingface.co/spaces/cuilabs/bee" SPACE_BRANCH = "main" # confirmed via `git ls-remote space` # HF Spaces require YAML frontmatter at the top of README.md to set # the Space's config (sdk, port, title, etc.). Local README.md is the # marketing-facing doc and intentionally has no frontmatter — we inject # the Space-specific block at deploy time only. # # Without this, the Space lands in CONFIG_ERROR (cardData.sdk = None) # because HF re-reads cardData from README on every push. # # app_port: 7860 is the HF Spaces default and what the runtime actually # binds to regardless of what we set. The previous app_port: 8000 caused # RUNTIME_ERROR — HF's reverse proxy probed :8000 forever, container was # bound on :7860, healthcheck never reported healthy, Space killed at # the 30-min watchdog deadline. Verified against actual run logs of the # 5a22d328 deploy (2026-04-29). HF_SPACE_FRONTMATTER = """--- title: Bee Intelligence Engine emoji: 🐝 colorFrom: yellow colorTo: gray sdk: docker app_port: 7860 pinned: true license: apache-2.0 short_description: The Intelligence Engine — domain LoRA adapters --- """ def run(cmd: list[str], cwd: Path) -> subprocess.CompletedProcess[str]: return subprocess.run(cmd, cwd=cwd, check=True, capture_output=True, text=True) def main() -> None: p = argparse.ArgumentParser() p.add_argument("--dry-run", action="store_true", help="build the deploy tree but skip the push") args = p.parse_args() sha = run(["git", "rev-parse", "--short", "HEAD"], cwd=ROOT).stdout.strip() full_sha = run(["git", "rev-parse", "HEAD"], cwd=ROOT).stdout.strip() branch = run(["git", "rev-parse", "--abbrev-ref", "HEAD"], cwd=ROOT).stdout.strip() print(f"deploying {sha} (branch {branch}) to {SPACE_REMOTE}:{SPACE_BRANCH}") with tempfile.TemporaryDirectory() as tmp: tmp = Path(tmp) # Copy required files / dirs. README.md gets the HF Space # frontmatter prepended — local README has no frontmatter # (it's a public-facing doc), but HF Spaces need YAML at the # top to know sdk/app_port/etc. for rel in REQUIRED_PATHS: src = ROOT / rel if not src.exists(): print(f" ✗ MISSING required path: {rel}") sys.exit(2) dst = tmp / rel dst.parent.mkdir(parents=True, exist_ok=True) if src.is_dir(): shutil.copytree(src, dst, ignore=IGNORE) elif rel == "README.md": # Inject HF Space frontmatter only if not already present. content = src.read_text(encoding="utf-8") if not content.lstrip().startswith("---"): dst.write_text(HF_SPACE_FRONTMATTER + content, encoding="utf-8") print(f" + {rel} (with injected HF frontmatter)") continue shutil.copy2(src, dst) else: shutil.copy2(src, dst) print(f" + {rel}") # Optional dirs only if they exist locally. for rel in OPTIONAL_PATHS: src = ROOT / rel if src.exists(): dst = tmp / rel dst.parent.mkdir(parents=True, exist_ok=True) if src.is_dir(): shutil.copytree(src, dst, ignore=IGNORE) else: shutil.copy2(src, dst) print(f" + {rel} (optional, present)") else: print(f" - {rel} (optional, not present, skipped)") # Strip files >10 MiB — HF rejects them at push time. Real # artifacts (large adapters, RAG indices) are downloaded at # Space startup via bee/hub_sync.py; baking them in is dev cruft. stripped: list[tuple[Path, int]] = [] for f in list(tmp.rglob("*")): if f.is_file() and f.stat().st_size > MAX_FILE_SIZE: stripped.append((f, f.stat().st_size)) f.unlink() if stripped: print(f"\n stripped {len(stripped)} file(s) larger than {MAX_FILE_SIZE // (1024 * 1024)} MiB:") for f, size in stripped: rel = f.relative_to(tmp) print(f" - {rel} ({size / 1024 / 1024:.1f} MiB)") if args.dry_run: total = sum(1 for _ in tmp.rglob("*") if _.is_file()) size = sum(f.stat().st_size for f in tmp.rglob("*") if f.is_file()) print(f"\n[dry-run] {total} files, {size:,} bytes total. Skipping push.") return # Init a fresh git repo in tmp; force-push as the Space's main. # Force is correct here: the Space's git is just a deploy # surface — actual source-of-truth git history lives on GitHub. run(["git", "init", "-q", "--initial-branch=main"], cwd=tmp) run(["git", "config", "user.name", "Bee Deploy"], cwd=tmp) run(["git", "config", "user.email", "ops@cuilabs.io"], cwd=tmp) run(["git", "add", "-A"], cwd=tmp) run(["git", "commit", "-q", "-m", f"HF Space backend deploy [{sha}]\n\nGitHub master: {full_sha}"], cwd=tmp) run(["git", "remote", "add", "space", SPACE_REMOTE], cwd=tmp) push = subprocess.run( ["git", "push", "--force", "space", f"main:{SPACE_BRANCH}"], cwd=tmp, capture_output=True, text=True, ) if push.returncode != 0: print(f" push failed:\n{push.stderr}", file=sys.stderr) sys.exit(push.returncode) print(f"\n pushed → {SPACE_REMOTE}:{SPACE_BRANCH}") print(f" HF Space is rebuilding now. Verify at:") print(f" https://huggingface.co/spaces/cuilabs/bee") print(f" https://cuilabs-bee.hf.space/v1/adapters (404 → still building)") if __name__ == "__main__": main()