openra-rl-challenge / scripts /push_hf_space.py
github-actions[bot]
Sync Space files from 04ee2ab23ee580fa351550303a8efd99a52df7e2
82d84b1
#!/usr/bin/env python3
"""Sync the allowlisted repo files into a Hugging Face Space git repo."""
from __future__ import annotations
import argparse
import json
import os
import shutil
import subprocess
import sys
import tempfile
from pathlib import Path
from typing import Any
REPO_ROOT = Path(__file__).resolve().parents[1]
DEFAULT_CONFIG_PATH = REPO_ROOT / ".hf-space-sync.json"
def run_git(args: list[str], cwd: Path, capture_output: bool = False) -> str:
"""Run a git command and raise on failure."""
result = subprocess.run(
["git", *args],
cwd=cwd,
check=True,
text=True,
capture_output=capture_output,
)
return result.stdout.strip() if capture_output else ""
def load_config(path: Path) -> dict[str, Any]:
"""Load the Space sync configuration."""
with path.open("r", encoding="utf-8") as handle:
return json.load(handle)
def render_front_matter(values: dict[str, Any]) -> str:
"""Render a minimal YAML front matter block."""
lines = ["---"]
for key, value in values.items():
if isinstance(value, bool):
rendered = "true" if value else "false"
elif isinstance(value, (int, float)):
rendered = str(value)
else:
text = str(value)
rendered = f"\"{text}\"" if ":" in text else text
lines.append(f"{key}: {rendered}")
lines.append("---")
lines.append("")
return "\n".join(lines)
def resolve_included_files(repo_root: Path, patterns: list[str]) -> list[Path]:
"""Resolve include glob patterns into a stable, unique file list."""
files: set[Path] = set()
missing_patterns: list[str] = []
for pattern in patterns:
matches = [path for path in repo_root.glob(pattern) if path.is_file()]
if not matches:
missing_patterns.append(pattern)
continue
files.update(path.resolve() for path in matches)
if missing_patterns:
joined = ", ".join(missing_patterns)
raise FileNotFoundError(f"No files matched include pattern(s): {joined}")
return sorted(files, key=lambda path: path.relative_to(repo_root).as_posix())
def build_readme_content(source: Path, generated_cfg: dict[str, Any]) -> str:
"""Apply README generation rules on top of the source file."""
content = source.read_text(encoding="utf-8")
front_matter = generated_cfg.get("prepend_front_matter")
if not front_matter:
return content
prefix = render_front_matter(front_matter)
if content.startswith("---\n"):
return content
return prefix + content
def copy_allowlisted_files(
repo_root: Path,
target_root: Path,
files: list[Path],
generated: dict[str, Any],
) -> None:
"""Copy the allowlisted files into the target repo."""
for source in files:
relative = source.relative_to(repo_root)
destination = target_root / relative
destination.parent.mkdir(parents=True, exist_ok=True)
generated_cfg = generated.get(relative.as_posix())
if generated_cfg and relative.name.lower() == "readme.md":
destination.write_text(build_readme_content(source, generated_cfg), encoding="utf-8")
else:
shutil.copy2(source, destination)
def clear_target_tree(target_root: Path) -> None:
"""Remove all files from the target repo except the git metadata."""
for child in target_root.iterdir():
if child.name == ".git":
continue
if child.is_dir():
shutil.rmtree(child)
else:
child.unlink()
def current_commit_short_sha(repo_root: Path) -> str:
"""Return the current git commit short SHA, or a fallback label."""
try:
return run_git(["rev-parse", "--short", "HEAD"], cwd=repo_root, capture_output=True)
except subprocess.CalledProcessError:
return "working-tree"
def build_remote_url(space_repo: str, username: str, token: str) -> str:
"""Construct the authenticated HF Space git URL."""
return f"https://{username}:{token}@huggingface.co/spaces/{space_repo}"
def sync_space(
repo_root: Path,
config_path: Path,
token: str,
username: str,
commit_message: str | None,
dry_run: bool,
) -> None:
"""Clone the HF Space repo, replace its allowlisted files, and push."""
config = load_config(config_path)
space_repo = str(config["space_repo"])
include_patterns = [str(pattern) for pattern in config.get("include", [])]
generated = dict(config.get("generated", {}))
if not include_patterns:
raise ValueError("Config must define at least one include pattern")
files = resolve_included_files(repo_root, include_patterns)
source_sha = current_commit_short_sha(repo_root)
message = commit_message or f"Sync Space files from {source_sha}"
remote_url = build_remote_url(space_repo=space_repo, username=username, token=token)
with tempfile.TemporaryDirectory(prefix="hf-space-sync-") as tmp_dir_str:
tmp_dir = Path(tmp_dir_str)
run_git(["clone", remote_url, str(tmp_dir)], cwd=repo_root)
clear_target_tree(tmp_dir)
copy_allowlisted_files(repo_root=repo_root, target_root=tmp_dir, files=files, generated=generated)
status = run_git(["status", "--porcelain"], cwd=tmp_dir, capture_output=True)
if not status:
print(f"No Hugging Face Space changes to push for {space_repo}.")
return
print(f"Prepared {len(files)} file(s) for Space sync to {space_repo}.")
if dry_run:
print("Dry run enabled; skipping commit and push.")
print(status)
return
run_git(["config", "user.name", "github-actions[bot]"], cwd=tmp_dir)
run_git(["config", "user.email", "41898282+github-actions[bot]@users.noreply.github.com"], cwd=tmp_dir)
run_git(["add", "--all"], cwd=tmp_dir)
run_git(["commit", "-m", message], cwd=tmp_dir)
run_git(["push", "origin", "HEAD:main"], cwd=tmp_dir)
print(f"Pushed Space update to https://huggingface.co/spaces/{space_repo}")
def parse_args() -> argparse.Namespace:
"""Parse CLI arguments."""
parser = argparse.ArgumentParser(description="Sync allowlisted files to a Hugging Face Space repo")
parser.add_argument(
"--config",
type=Path,
default=DEFAULT_CONFIG_PATH,
help=f"Path to the Space sync config (default: {DEFAULT_CONFIG_PATH})",
)
parser.add_argument(
"--token",
default=(os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN") or ""),
help="Hugging Face write token. Defaults to HF_TOKEN or HUGGINGFACE_HUB_TOKEN.",
)
parser.add_argument(
"--username",
default=(os.environ.get("HF_USERNAME") or "hf-token"),
help="Username to embed in the authenticated git URL (default: HF_USERNAME or hf-token).",
)
parser.add_argument(
"--commit-message",
default=None,
help="Optional custom commit message for the Space repo.",
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Prepare the target tree and print the pending git status without pushing.",
)
return parser.parse_args()
def main() -> int:
"""CLI entry point."""
args = parse_args()
if not args.token:
print("Missing Hugging Face token. Set HF_TOKEN or pass --token.", file=sys.stderr)
return 1
try:
sync_space(
repo_root=REPO_ROOT,
config_path=args.config.resolve(),
token=args.token,
username=args.username,
commit_message=args.commit_message,
dry_run=args.dry_run,
)
except Exception as exc:
print(f"Space sync failed: {type(exc).__name__}: {exc}", file=sys.stderr)
return 1
return 0
if __name__ == "__main__":
raise SystemExit(main())