File size: 1,978 Bytes
15d3835
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
520cc96
 
 
 
 
15d3835
 
 
520cc96
15d3835
520cc96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15d3835
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
"""
Sync the local index directory with the team-0/ragstudio HF bucket.

Usage:
    python sync.py            # push index_data/ -> hf://buckets/team-0/ragstudio
    python sync.py --pull     # pull hf://buckets/team-0/ragstudio -> index_data/

Extra flags (e.g. --delete, --dry-run) are forwarded to `hf sync`.
"""

import argparse
import subprocess
import sys
from pathlib import Path

from indexers import INDEX_DIR

BUCKET = "hf://buckets/team-0/ragstudio"

# Resolve `hf` next to the active interpreter so we don't accidentally pick
# up an older hf from PATH that lacks the `sync` subcommand.
HF_BIN = str(Path(sys.executable).parent / "hf")


def _run(cmd: list[str]) -> None:
    print(" ".join(cmd))
    subprocess.run(cmd, check=True)


def sync(pull: bool = False, extra: list[str] | None = None) -> None:
    INDEX_DIR.mkdir(exist_ok=True)
    local = str(INDEX_DIR)
    extra = extra or []
    src, dst = (BUCKET, local) if pull else (local, BUCKET)
    _run([HF_BIN, "sync", src, dst, *extra])

    # On push, also upload the source files that were indexed. The folder is
    # recorded in _source.txt by build_index(). Pull doesn't need a second call:
    # files pushed to <bucket>/source/ come back under <INDEX_DIR>/source/ as
    # part of the main bucket->INDEX_DIR sync above.
    if pull:
        return
    manifest = INDEX_DIR / "_source.txt"
    if not manifest.exists():
        return
    source = Path(manifest.read_text().strip())
    if not source.is_dir():
        print(f"skipping source push: {source} not found")
        return
    _run([HF_BIN, "sync", str(source), f"{BUCKET}/source", *extra])


def main() -> None:
    parser = argparse.ArgumentParser(description=__doc__.strip())
    parser.add_argument(
        "--pull",
        action="store_true",
        help="pull from bucket to local (default is push)",
    )
    args, extra = parser.parse_known_args()
    sync(pull=args.pull, extra=extra)


if __name__ == "__main__":
    main()