Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """Offline: publish built index artifact to Hugging Face dataset repo.""" | |
| from __future__ import annotations | |
| import argparse | |
| from pathlib import Path | |
| import os | |
| from huggingface_hub import HfApi, create_repo, upload_folder | |
| REPO_ROOT = Path(__file__).resolve().parents[2] | |
| DEFAULT_SOURCE_DIR = REPO_ROOT / "data" / "lancedb" | |
| def parse_args() -> argparse.Namespace: | |
| parser = argparse.ArgumentParser(description="Upload index artifact to Hugging Face dataset repo.") | |
| parser.add_argument("--repo-id", required=True, help="Target HF dataset repo (e.g. org/name).") | |
| parser.add_argument( | |
| "--source-dir", | |
| default=str(DEFAULT_SOURCE_DIR), | |
| help=f"Directory to upload (default: {DEFAULT_SOURCE_DIR}).", | |
| ) | |
| parser.add_argument("--revision", default="main", help="Target branch/revision (default: main).") | |
| parser.add_argument("--private", action="store_true", help="Create repo as private if it does not exist.") | |
| parser.add_argument( | |
| "--token-env", | |
| default="HF_TOKEN", | |
| help="Environment variable name holding HF write token (default: HF_TOKEN).", | |
| ) | |
| return parser.parse_args() | |
| def run(args: argparse.Namespace) -> int: | |
| source_dir = Path(args.source_dir).expanduser().resolve() | |
| if not source_dir.exists(): | |
| raise FileNotFoundError(f"Source directory does not exist: {source_dir}") | |
| token = os.getenv(args.token_env) | |
| if not token: | |
| raise ValueError(f"Missing Hugging Face token in environment variable: {args.token_env}") | |
| create_repo( | |
| repo_id=args.repo_id, | |
| repo_type="dataset", | |
| private=bool(args.private), | |
| token=token, | |
| exist_ok=True, | |
| ) | |
| api = HfApi(token=token) | |
| if args.revision != "main": | |
| try: | |
| api.create_branch(repo_id=args.repo_id, repo_type="dataset", branch=args.revision, exist_ok=True) | |
| except TypeError: | |
| # Backward-compatible path for clients without exist_ok support. | |
| try: | |
| api.create_branch(repo_id=args.repo_id, repo_type="dataset", branch=args.revision) | |
| except Exception: | |
| pass | |
| commit_info = upload_folder( | |
| repo_id=args.repo_id, | |
| repo_type="dataset", | |
| folder_path=str(source_dir), | |
| path_in_repo=".", | |
| revision=args.revision, | |
| commit_message="Update THIRAWAT mapper demo index artifact", | |
| token=token, | |
| ) | |
| print(f"Uploaded index artifact to {args.repo_id}@{args.revision}") | |
| print(f"Commit URL: {commit_info.commit_url}") | |
| return 0 | |
| def main() -> int: | |
| args = parse_args() | |
| return run(args) | |
| if __name__ == "__main__": | |
| raise SystemExit(main()) | |