File size: 7,955 Bytes
82d84b1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
#!/usr/bin/env python3
"""Sync the allowlisted repo files into a Hugging Face Space git repo."""

from __future__ import annotations

import argparse
import json
import os
import shutil
import subprocess
import sys
import tempfile
from pathlib import Path
from typing import Any


REPO_ROOT = Path(__file__).resolve().parents[1]
DEFAULT_CONFIG_PATH = REPO_ROOT / ".hf-space-sync.json"


def run_git(args: list[str], cwd: Path, capture_output: bool = False) -> str:
    """Run a git command and raise on failure."""
    result = subprocess.run(
        ["git", *args],
        cwd=cwd,
        check=True,
        text=True,
        capture_output=capture_output,
    )
    return result.stdout.strip() if capture_output else ""


def load_config(path: Path) -> dict[str, Any]:
    """Load the Space sync configuration."""
    with path.open("r", encoding="utf-8") as handle:
        return json.load(handle)


def render_front_matter(values: dict[str, Any]) -> str:
    """Render a minimal YAML front matter block."""
    lines = ["---"]
    for key, value in values.items():
        if isinstance(value, bool):
            rendered = "true" if value else "false"
        elif isinstance(value, (int, float)):
            rendered = str(value)
        else:
            text = str(value)
            rendered = f"\"{text}\"" if ":" in text else text
        lines.append(f"{key}: {rendered}")
    lines.append("---")
    lines.append("")
    return "\n".join(lines)


def resolve_included_files(repo_root: Path, patterns: list[str]) -> list[Path]:
    """Resolve include glob patterns into a stable, unique file list."""
    files: set[Path] = set()
    missing_patterns: list[str] = []

    for pattern in patterns:
        matches = [path for path in repo_root.glob(pattern) if path.is_file()]
        if not matches:
            missing_patterns.append(pattern)
            continue
        files.update(path.resolve() for path in matches)

    if missing_patterns:
        joined = ", ".join(missing_patterns)
        raise FileNotFoundError(f"No files matched include pattern(s): {joined}")

    return sorted(files, key=lambda path: path.relative_to(repo_root).as_posix())


def build_readme_content(source: Path, generated_cfg: dict[str, Any]) -> str:
    """Apply README generation rules on top of the source file."""
    content = source.read_text(encoding="utf-8")
    front_matter = generated_cfg.get("prepend_front_matter")
    if not front_matter:
        return content

    prefix = render_front_matter(front_matter)
    if content.startswith("---\n"):
        return content
    return prefix + content


def copy_allowlisted_files(
    repo_root: Path,
    target_root: Path,
    files: list[Path],
    generated: dict[str, Any],
) -> None:
    """Copy the allowlisted files into the target repo."""
    for source in files:
        relative = source.relative_to(repo_root)
        destination = target_root / relative
        destination.parent.mkdir(parents=True, exist_ok=True)

        generated_cfg = generated.get(relative.as_posix())
        if generated_cfg and relative.name.lower() == "readme.md":
            destination.write_text(build_readme_content(source, generated_cfg), encoding="utf-8")
        else:
            shutil.copy2(source, destination)


def clear_target_tree(target_root: Path) -> None:
    """Remove all files from the target repo except the git metadata."""
    for child in target_root.iterdir():
        if child.name == ".git":
            continue
        if child.is_dir():
            shutil.rmtree(child)
        else:
            child.unlink()


def current_commit_short_sha(repo_root: Path) -> str:
    """Return the current git commit short SHA, or a fallback label."""
    try:
        return run_git(["rev-parse", "--short", "HEAD"], cwd=repo_root, capture_output=True)
    except subprocess.CalledProcessError:
        return "working-tree"


def build_remote_url(space_repo: str, username: str, token: str) -> str:
    """Construct the authenticated HF Space git URL."""
    return f"https://{username}:{token}@huggingface.co/spaces/{space_repo}"


def sync_space(
    repo_root: Path,
    config_path: Path,
    token: str,
    username: str,
    commit_message: str | None,
    dry_run: bool,
) -> None:
    """Clone the HF Space repo, replace its allowlisted files, and push."""
    config = load_config(config_path)
    space_repo = str(config["space_repo"])
    include_patterns = [str(pattern) for pattern in config.get("include", [])]
    generated = dict(config.get("generated", {}))

    if not include_patterns:
        raise ValueError("Config must define at least one include pattern")

    files = resolve_included_files(repo_root, include_patterns)
    source_sha = current_commit_short_sha(repo_root)
    message = commit_message or f"Sync Space files from {source_sha}"
    remote_url = build_remote_url(space_repo=space_repo, username=username, token=token)

    with tempfile.TemporaryDirectory(prefix="hf-space-sync-") as tmp_dir_str:
        tmp_dir = Path(tmp_dir_str)
        run_git(["clone", remote_url, str(tmp_dir)], cwd=repo_root)
        clear_target_tree(tmp_dir)
        copy_allowlisted_files(repo_root=repo_root, target_root=tmp_dir, files=files, generated=generated)

        status = run_git(["status", "--porcelain"], cwd=tmp_dir, capture_output=True)
        if not status:
            print(f"No Hugging Face Space changes to push for {space_repo}.")
            return

        print(f"Prepared {len(files)} file(s) for Space sync to {space_repo}.")
        if dry_run:
            print("Dry run enabled; skipping commit and push.")
            print(status)
            return

        run_git(["config", "user.name", "github-actions[bot]"], cwd=tmp_dir)
        run_git(["config", "user.email", "41898282+github-actions[bot]@users.noreply.github.com"], cwd=tmp_dir)
        run_git(["add", "--all"], cwd=tmp_dir)
        run_git(["commit", "-m", message], cwd=tmp_dir)
        run_git(["push", "origin", "HEAD:main"], cwd=tmp_dir)
        print(f"Pushed Space update to https://huggingface.co/spaces/{space_repo}")


def parse_args() -> argparse.Namespace:
    """Parse CLI arguments."""
    parser = argparse.ArgumentParser(description="Sync allowlisted files to a Hugging Face Space repo")
    parser.add_argument(
        "--config",
        type=Path,
        default=DEFAULT_CONFIG_PATH,
        help=f"Path to the Space sync config (default: {DEFAULT_CONFIG_PATH})",
    )
    parser.add_argument(
        "--token",
        default=(os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN") or ""),
        help="Hugging Face write token. Defaults to HF_TOKEN or HUGGINGFACE_HUB_TOKEN.",
    )
    parser.add_argument(
        "--username",
        default=(os.environ.get("HF_USERNAME") or "hf-token"),
        help="Username to embed in the authenticated git URL (default: HF_USERNAME or hf-token).",
    )
    parser.add_argument(
        "--commit-message",
        default=None,
        help="Optional custom commit message for the Space repo.",
    )
    parser.add_argument(
        "--dry-run",
        action="store_true",
        help="Prepare the target tree and print the pending git status without pushing.",
    )
    return parser.parse_args()


def main() -> int:
    """CLI entry point."""
    args = parse_args()
    if not args.token:
        print("Missing Hugging Face token. Set HF_TOKEN or pass --token.", file=sys.stderr)
        return 1

    try:
        sync_space(
            repo_root=REPO_ROOT,
            config_path=args.config.resolve(),
            token=args.token,
            username=args.username,
            commit_message=args.commit_message,
            dry_run=args.dry_run,
        )
    except Exception as exc:
        print(f"Space sync failed: {type(exc).__name__}: {exc}", file=sys.stderr)
        return 1
    return 0


if __name__ == "__main__":
    raise SystemExit(main())