Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """Sync the allowlisted repo files into a Hugging Face Space git repo.""" | |
| from __future__ import annotations | |
| import argparse | |
| import json | |
| import os | |
| import shutil | |
| import subprocess | |
| import sys | |
| import tempfile | |
| from pathlib import Path | |
| from typing import Any | |
| REPO_ROOT = Path(__file__).resolve().parents[1] | |
| DEFAULT_CONFIG_PATH = REPO_ROOT / ".hf-space-sync.json" | |
| def run_git(args: list[str], cwd: Path, capture_output: bool = False) -> str: | |
| """Run a git command and raise on failure.""" | |
| result = subprocess.run( | |
| ["git", *args], | |
| cwd=cwd, | |
| check=True, | |
| text=True, | |
| capture_output=capture_output, | |
| ) | |
| return result.stdout.strip() if capture_output else "" | |
| def load_config(path: Path) -> dict[str, Any]: | |
| """Load the Space sync configuration.""" | |
| with path.open("r", encoding="utf-8") as handle: | |
| return json.load(handle) | |
| def render_front_matter(values: dict[str, Any]) -> str: | |
| """Render a minimal YAML front matter block.""" | |
| lines = ["---"] | |
| for key, value in values.items(): | |
| if isinstance(value, bool): | |
| rendered = "true" if value else "false" | |
| elif isinstance(value, (int, float)): | |
| rendered = str(value) | |
| else: | |
| text = str(value) | |
| rendered = f"\"{text}\"" if ":" in text else text | |
| lines.append(f"{key}: {rendered}") | |
| lines.append("---") | |
| lines.append("") | |
| return "\n".join(lines) | |
| def resolve_included_files(repo_root: Path, patterns: list[str]) -> list[Path]: | |
| """Resolve include glob patterns into a stable, unique file list.""" | |
| files: set[Path] = set() | |
| missing_patterns: list[str] = [] | |
| for pattern in patterns: | |
| matches = [path for path in repo_root.glob(pattern) if path.is_file()] | |
| if not matches: | |
| missing_patterns.append(pattern) | |
| continue | |
| files.update(path.resolve() for path in matches) | |
| if missing_patterns: | |
| joined = ", ".join(missing_patterns) | |
| raise FileNotFoundError(f"No files matched include pattern(s): {joined}") | |
| return sorted(files, key=lambda path: path.relative_to(repo_root).as_posix()) | |
| def build_readme_content(source: Path, generated_cfg: dict[str, Any]) -> str: | |
| """Apply README generation rules on top of the source file.""" | |
| content = source.read_text(encoding="utf-8") | |
| front_matter = generated_cfg.get("prepend_front_matter") | |
| if not front_matter: | |
| return content | |
| prefix = render_front_matter(front_matter) | |
| if content.startswith("---\n"): | |
| return content | |
| return prefix + content | |
| def copy_allowlisted_files( | |
| repo_root: Path, | |
| target_root: Path, | |
| files: list[Path], | |
| generated: dict[str, Any], | |
| ) -> None: | |
| """Copy the allowlisted files into the target repo.""" | |
| for source in files: | |
| relative = source.relative_to(repo_root) | |
| destination = target_root / relative | |
| destination.parent.mkdir(parents=True, exist_ok=True) | |
| generated_cfg = generated.get(relative.as_posix()) | |
| if generated_cfg and relative.name.lower() == "readme.md": | |
| destination.write_text(build_readme_content(source, generated_cfg), encoding="utf-8") | |
| else: | |
| shutil.copy2(source, destination) | |
| def clear_target_tree(target_root: Path) -> None: | |
| """Remove all files from the target repo except the git metadata.""" | |
| for child in target_root.iterdir(): | |
| if child.name == ".git": | |
| continue | |
| if child.is_dir(): | |
| shutil.rmtree(child) | |
| else: | |
| child.unlink() | |
| def current_commit_short_sha(repo_root: Path) -> str: | |
| """Return the current git commit short SHA, or a fallback label.""" | |
| try: | |
| return run_git(["rev-parse", "--short", "HEAD"], cwd=repo_root, capture_output=True) | |
| except subprocess.CalledProcessError: | |
| return "working-tree" | |
| def build_remote_url(space_repo: str, username: str, token: str) -> str: | |
| """Construct the authenticated HF Space git URL.""" | |
| return f"https://{username}:{token}@huggingface.co/spaces/{space_repo}" | |
| def sync_space( | |
| repo_root: Path, | |
| config_path: Path, | |
| token: str, | |
| username: str, | |
| commit_message: str | None, | |
| dry_run: bool, | |
| ) -> None: | |
| """Clone the HF Space repo, replace its allowlisted files, and push.""" | |
| config = load_config(config_path) | |
| space_repo = str(config["space_repo"]) | |
| include_patterns = [str(pattern) for pattern in config.get("include", [])] | |
| generated = dict(config.get("generated", {})) | |
| if not include_patterns: | |
| raise ValueError("Config must define at least one include pattern") | |
| files = resolve_included_files(repo_root, include_patterns) | |
| source_sha = current_commit_short_sha(repo_root) | |
| message = commit_message or f"Sync Space files from {source_sha}" | |
| remote_url = build_remote_url(space_repo=space_repo, username=username, token=token) | |
| with tempfile.TemporaryDirectory(prefix="hf-space-sync-") as tmp_dir_str: | |
| tmp_dir = Path(tmp_dir_str) | |
| run_git(["clone", remote_url, str(tmp_dir)], cwd=repo_root) | |
| clear_target_tree(tmp_dir) | |
| copy_allowlisted_files(repo_root=repo_root, target_root=tmp_dir, files=files, generated=generated) | |
| status = run_git(["status", "--porcelain"], cwd=tmp_dir, capture_output=True) | |
| if not status: | |
| print(f"No Hugging Face Space changes to push for {space_repo}.") | |
| return | |
| print(f"Prepared {len(files)} file(s) for Space sync to {space_repo}.") | |
| if dry_run: | |
| print("Dry run enabled; skipping commit and push.") | |
| print(status) | |
| return | |
| run_git(["config", "user.name", "github-actions[bot]"], cwd=tmp_dir) | |
| run_git(["config", "user.email", "41898282+github-actions[bot]@users.noreply.github.com"], cwd=tmp_dir) | |
| run_git(["add", "--all"], cwd=tmp_dir) | |
| run_git(["commit", "-m", message], cwd=tmp_dir) | |
| run_git(["push", "origin", "HEAD:main"], cwd=tmp_dir) | |
| print(f"Pushed Space update to https://huggingface.co/spaces/{space_repo}") | |
| def parse_args() -> argparse.Namespace: | |
| """Parse CLI arguments.""" | |
| parser = argparse.ArgumentParser(description="Sync allowlisted files to a Hugging Face Space repo") | |
| parser.add_argument( | |
| "--config", | |
| type=Path, | |
| default=DEFAULT_CONFIG_PATH, | |
| help=f"Path to the Space sync config (default: {DEFAULT_CONFIG_PATH})", | |
| ) | |
| parser.add_argument( | |
| "--token", | |
| default=(os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN") or ""), | |
| help="Hugging Face write token. Defaults to HF_TOKEN or HUGGINGFACE_HUB_TOKEN.", | |
| ) | |
| parser.add_argument( | |
| "--username", | |
| default=(os.environ.get("HF_USERNAME") or "hf-token"), | |
| help="Username to embed in the authenticated git URL (default: HF_USERNAME or hf-token).", | |
| ) | |
| parser.add_argument( | |
| "--commit-message", | |
| default=None, | |
| help="Optional custom commit message for the Space repo.", | |
| ) | |
| parser.add_argument( | |
| "--dry-run", | |
| action="store_true", | |
| help="Prepare the target tree and print the pending git status without pushing.", | |
| ) | |
| return parser.parse_args() | |
| def main() -> int: | |
| """CLI entry point.""" | |
| args = parse_args() | |
| if not args.token: | |
| print("Missing Hugging Face token. Set HF_TOKEN or pass --token.", file=sys.stderr) | |
| return 1 | |
| try: | |
| sync_space( | |
| repo_root=REPO_ROOT, | |
| config_path=args.config.resolve(), | |
| token=args.token, | |
| username=args.username, | |
| commit_message=args.commit_message, | |
| dry_run=args.dry_run, | |
| ) | |
| except Exception as exc: | |
| print(f"Space sync failed: {type(exc).__name__}: {exc}", file=sys.stderr) | |
| return 1 | |
| return 0 | |
| if __name__ == "__main__": | |
| raise SystemExit(main()) | |