Spaces:
Sleeping
Sleeping
| """ | |
| Sync the local index directory with the team-0/ragstudio HF bucket. | |
| Usage: | |
| python sync.py # push index_data/ -> hf://buckets/team-0/ragstudio | |
| python sync.py --pull # pull hf://buckets/team-0/ragstudio -> index_data/ | |
| Extra flags (e.g. --delete, --dry-run) are forwarded to `hf sync`. | |
| """ | |
| import argparse | |
| import subprocess | |
| import sys | |
| from pathlib import Path | |
| from indexers import INDEX_DIR | |
| BUCKET = "hf://buckets/team-0/ragstudio" | |
| # Resolve `hf` next to the active interpreter so we don't accidentally pick | |
| # up an older hf from PATH that lacks the `sync` subcommand. | |
| HF_BIN = str(Path(sys.executable).parent / "hf") | |
| def _run(cmd: list[str]) -> None: | |
| print(" ".join(cmd)) | |
| subprocess.run(cmd, check=True) | |
| def sync(pull: bool = False, extra: list[str] | None = None) -> None: | |
| INDEX_DIR.mkdir(exist_ok=True) | |
| local = str(INDEX_DIR) | |
| extra = extra or [] | |
| src, dst = (BUCKET, local) if pull else (local, BUCKET) | |
| _run([HF_BIN, "sync", src, dst, *extra]) | |
| # On push, also upload the source files that were indexed. The folder is | |
| # recorded in _source.txt by build_index(). Pull doesn't need a second call: | |
| # files pushed to <bucket>/source/ come back under <INDEX_DIR>/source/ as | |
| # part of the main bucket->INDEX_DIR sync above. | |
| if pull: | |
| return | |
| manifest = INDEX_DIR / "_source.txt" | |
| if not manifest.exists(): | |
| return | |
| source = Path(manifest.read_text().strip()) | |
| if not source.is_dir(): | |
| print(f"skipping source push: {source} not found") | |
| return | |
| _run([HF_BIN, "sync", str(source), f"{BUCKET}/source", *extra]) | |
| def main() -> None: | |
| parser = argparse.ArgumentParser(description=__doc__.strip()) | |
| parser.add_argument( | |
| "--pull", | |
| action="store_true", | |
| help="pull from bucket to local (default is push)", | |
| ) | |
| args, extra = parser.parse_known_args() | |
| sync(pull=args.pull, extra=extra) | |
| if __name__ == "__main__": | |
| main() | |