Spaces:
Sleeping
Sleeping
File size: 7,955 Bytes
82d84b1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 | #!/usr/bin/env python3
"""Sync the allowlisted repo files into a Hugging Face Space git repo."""
from __future__ import annotations
import argparse
import json
import os
import shutil
import subprocess
import sys
import tempfile
from pathlib import Path
from typing import Any
REPO_ROOT = Path(__file__).resolve().parents[1]
DEFAULT_CONFIG_PATH = REPO_ROOT / ".hf-space-sync.json"
def run_git(args: list[str], cwd: Path, capture_output: bool = False) -> str:
"""Run a git command and raise on failure."""
result = subprocess.run(
["git", *args],
cwd=cwd,
check=True,
text=True,
capture_output=capture_output,
)
return result.stdout.strip() if capture_output else ""
def load_config(path: Path) -> dict[str, Any]:
"""Load the Space sync configuration."""
with path.open("r", encoding="utf-8") as handle:
return json.load(handle)
def render_front_matter(values: dict[str, Any]) -> str:
"""Render a minimal YAML front matter block."""
lines = ["---"]
for key, value in values.items():
if isinstance(value, bool):
rendered = "true" if value else "false"
elif isinstance(value, (int, float)):
rendered = str(value)
else:
text = str(value)
rendered = f"\"{text}\"" if ":" in text else text
lines.append(f"{key}: {rendered}")
lines.append("---")
lines.append("")
return "\n".join(lines)
def resolve_included_files(repo_root: Path, patterns: list[str]) -> list[Path]:
"""Resolve include glob patterns into a stable, unique file list."""
files: set[Path] = set()
missing_patterns: list[str] = []
for pattern in patterns:
matches = [path for path in repo_root.glob(pattern) if path.is_file()]
if not matches:
missing_patterns.append(pattern)
continue
files.update(path.resolve() for path in matches)
if missing_patterns:
joined = ", ".join(missing_patterns)
raise FileNotFoundError(f"No files matched include pattern(s): {joined}")
return sorted(files, key=lambda path: path.relative_to(repo_root).as_posix())
def build_readme_content(source: Path, generated_cfg: dict[str, Any]) -> str:
"""Apply README generation rules on top of the source file."""
content = source.read_text(encoding="utf-8")
front_matter = generated_cfg.get("prepend_front_matter")
if not front_matter:
return content
prefix = render_front_matter(front_matter)
if content.startswith("---\n"):
return content
return prefix + content
def copy_allowlisted_files(
repo_root: Path,
target_root: Path,
files: list[Path],
generated: dict[str, Any],
) -> None:
"""Copy the allowlisted files into the target repo."""
for source in files:
relative = source.relative_to(repo_root)
destination = target_root / relative
destination.parent.mkdir(parents=True, exist_ok=True)
generated_cfg = generated.get(relative.as_posix())
if generated_cfg and relative.name.lower() == "readme.md":
destination.write_text(build_readme_content(source, generated_cfg), encoding="utf-8")
else:
shutil.copy2(source, destination)
def clear_target_tree(target_root: Path) -> None:
"""Remove all files from the target repo except the git metadata."""
for child in target_root.iterdir():
if child.name == ".git":
continue
if child.is_dir():
shutil.rmtree(child)
else:
child.unlink()
def current_commit_short_sha(repo_root: Path) -> str:
"""Return the current git commit short SHA, or a fallback label."""
try:
return run_git(["rev-parse", "--short", "HEAD"], cwd=repo_root, capture_output=True)
except subprocess.CalledProcessError:
return "working-tree"
def build_remote_url(space_repo: str, username: str, token: str) -> str:
"""Construct the authenticated HF Space git URL."""
return f"https://{username}:{token}@huggingface.co/spaces/{space_repo}"
def sync_space(
repo_root: Path,
config_path: Path,
token: str,
username: str,
commit_message: str | None,
dry_run: bool,
) -> None:
"""Clone the HF Space repo, replace its allowlisted files, and push."""
config = load_config(config_path)
space_repo = str(config["space_repo"])
include_patterns = [str(pattern) for pattern in config.get("include", [])]
generated = dict(config.get("generated", {}))
if not include_patterns:
raise ValueError("Config must define at least one include pattern")
files = resolve_included_files(repo_root, include_patterns)
source_sha = current_commit_short_sha(repo_root)
message = commit_message or f"Sync Space files from {source_sha}"
remote_url = build_remote_url(space_repo=space_repo, username=username, token=token)
with tempfile.TemporaryDirectory(prefix="hf-space-sync-") as tmp_dir_str:
tmp_dir = Path(tmp_dir_str)
run_git(["clone", remote_url, str(tmp_dir)], cwd=repo_root)
clear_target_tree(tmp_dir)
copy_allowlisted_files(repo_root=repo_root, target_root=tmp_dir, files=files, generated=generated)
status = run_git(["status", "--porcelain"], cwd=tmp_dir, capture_output=True)
if not status:
print(f"No Hugging Face Space changes to push for {space_repo}.")
return
print(f"Prepared {len(files)} file(s) for Space sync to {space_repo}.")
if dry_run:
print("Dry run enabled; skipping commit and push.")
print(status)
return
run_git(["config", "user.name", "github-actions[bot]"], cwd=tmp_dir)
run_git(["config", "user.email", "41898282+github-actions[bot]@users.noreply.github.com"], cwd=tmp_dir)
run_git(["add", "--all"], cwd=tmp_dir)
run_git(["commit", "-m", message], cwd=tmp_dir)
run_git(["push", "origin", "HEAD:main"], cwd=tmp_dir)
print(f"Pushed Space update to https://huggingface.co/spaces/{space_repo}")
def parse_args() -> argparse.Namespace:
"""Parse CLI arguments."""
parser = argparse.ArgumentParser(description="Sync allowlisted files to a Hugging Face Space repo")
parser.add_argument(
"--config",
type=Path,
default=DEFAULT_CONFIG_PATH,
help=f"Path to the Space sync config (default: {DEFAULT_CONFIG_PATH})",
)
parser.add_argument(
"--token",
default=(os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN") or ""),
help="Hugging Face write token. Defaults to HF_TOKEN or HUGGINGFACE_HUB_TOKEN.",
)
parser.add_argument(
"--username",
default=(os.environ.get("HF_USERNAME") or "hf-token"),
help="Username to embed in the authenticated git URL (default: HF_USERNAME or hf-token).",
)
parser.add_argument(
"--commit-message",
default=None,
help="Optional custom commit message for the Space repo.",
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Prepare the target tree and print the pending git status without pushing.",
)
return parser.parse_args()
def main() -> int:
"""CLI entry point."""
args = parse_args()
if not args.token:
print("Missing Hugging Face token. Set HF_TOKEN or pass --token.", file=sys.stderr)
return 1
try:
sync_space(
repo_root=REPO_ROOT,
config_path=args.config.resolve(),
token=args.token,
username=args.username,
commit_message=args.commit_message,
dry_run=args.dry_run,
)
except Exception as exc:
print(f"Space sync failed: {type(exc).__name__}: {exc}", file=sys.stderr)
return 1
return 0
if __name__ == "__main__":
raise SystemExit(main())
|