""" Sync the local index directory with the team-0/ragstudio HF bucket. Usage: python sync.py # push index_data/ -> hf://buckets/team-0/ragstudio python sync.py --pull # pull hf://buckets/team-0/ragstudio -> index_data/ Extra flags (e.g. --delete, --dry-run) are forwarded to `hf sync`. """ import argparse import subprocess import sys from pathlib import Path from indexers import INDEX_DIR BUCKET = "hf://buckets/team-0/ragstudio" # Resolve `hf` next to the active interpreter so we don't accidentally pick # up an older hf from PATH that lacks the `sync` subcommand. HF_BIN = str(Path(sys.executable).parent / "hf") def _run(cmd: list[str]) -> None: print(" ".join(cmd)) subprocess.run(cmd, check=True) def sync(pull: bool = False, extra: list[str] | None = None) -> None: INDEX_DIR.mkdir(exist_ok=True) local = str(INDEX_DIR) extra = extra or [] src, dst = (BUCKET, local) if pull else (local, BUCKET) _run([HF_BIN, "sync", src, dst, *extra]) # On push, also upload the source files that were indexed. The folder is # recorded in _source.txt by build_index(). Pull doesn't need a second call: # files pushed to /source/ come back under /source/ as # part of the main bucket->INDEX_DIR sync above. if pull: return manifest = INDEX_DIR / "_source.txt" if not manifest.exists(): return source = Path(manifest.read_text().strip()) if not source.is_dir(): print(f"skipping source push: {source} not found") return _run([HF_BIN, "sync", str(source), f"{BUCKET}/source", *extra]) def main() -> None: parser = argparse.ArgumentParser(description=__doc__.strip()) parser.add_argument( "--pull", action="store_true", help="pull from bucket to local (default is push)", ) args, extra = parser.parse_known_args() sync(pull=args.pull, extra=extra) if __name__ == "__main__": main()