Spaces:
Sleeping
Sleeping
File size: 1,978 Bytes
15d3835 520cc96 15d3835 520cc96 15d3835 520cc96 15d3835 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 | """
Sync the local index directory with the team-0/ragstudio HF bucket.
Usage:
python sync.py # push index_data/ -> hf://buckets/team-0/ragstudio
python sync.py --pull # pull hf://buckets/team-0/ragstudio -> index_data/
Extra flags (e.g. --delete, --dry-run) are forwarded to `hf sync`.
"""
import argparse
import subprocess
import sys
from pathlib import Path
from indexers import INDEX_DIR
BUCKET = "hf://buckets/team-0/ragstudio"
# Resolve `hf` next to the active interpreter so we don't accidentally pick
# up an older hf from PATH that lacks the `sync` subcommand.
HF_BIN = str(Path(sys.executable).parent / "hf")
def _run(cmd: list[str]) -> None:
print(" ".join(cmd))
subprocess.run(cmd, check=True)
def sync(pull: bool = False, extra: list[str] | None = None) -> None:
INDEX_DIR.mkdir(exist_ok=True)
local = str(INDEX_DIR)
extra = extra or []
src, dst = (BUCKET, local) if pull else (local, BUCKET)
_run([HF_BIN, "sync", src, dst, *extra])
# On push, also upload the source files that were indexed. The folder is
# recorded in _source.txt by build_index(). Pull doesn't need a second call:
# files pushed to <bucket>/source/ come back under <INDEX_DIR>/source/ as
# part of the main bucket->INDEX_DIR sync above.
if pull:
return
manifest = INDEX_DIR / "_source.txt"
if not manifest.exists():
return
source = Path(manifest.read_text().strip())
if not source.is_dir():
print(f"skipping source push: {source} not found")
return
_run([HF_BIN, "sync", str(source), f"{BUCKET}/source", *extra])
def main() -> None:
parser = argparse.ArgumentParser(description=__doc__.strip())
parser.add_argument(
"--pull",
action="store_true",
help="pull from bucket to local (default is push)",
)
args, extra = parser.parse_known_args()
sync(pull=args.pull, extra=extra)
if __name__ == "__main__":
main()
|