THIRAWAT-mapper-demo / scripts /offline /publish_index_hf.py
na399's picture
Deploy THIRAWAT mapper app
25c66a0 verified
#!/usr/bin/env python3
"""Offline: publish built index artifact to Hugging Face dataset repo."""
from __future__ import annotations
import argparse
from pathlib import Path
import os
from huggingface_hub import HfApi, create_repo, upload_folder
REPO_ROOT = Path(__file__).resolve().parents[2]
DEFAULT_SOURCE_DIR = REPO_ROOT / "data" / "lancedb"
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Upload index artifact to Hugging Face dataset repo.")
parser.add_argument("--repo-id", required=True, help="Target HF dataset repo (e.g. org/name).")
parser.add_argument(
"--source-dir",
default=str(DEFAULT_SOURCE_DIR),
help=f"Directory to upload (default: {DEFAULT_SOURCE_DIR}).",
)
parser.add_argument("--revision", default="main", help="Target branch/revision (default: main).")
parser.add_argument("--private", action="store_true", help="Create repo as private if it does not exist.")
parser.add_argument(
"--token-env",
default="HF_TOKEN",
help="Environment variable name holding HF write token (default: HF_TOKEN).",
)
return parser.parse_args()
def run(args: argparse.Namespace) -> int:
source_dir = Path(args.source_dir).expanduser().resolve()
if not source_dir.exists():
raise FileNotFoundError(f"Source directory does not exist: {source_dir}")
token = os.getenv(args.token_env)
if not token:
raise ValueError(f"Missing Hugging Face token in environment variable: {args.token_env}")
create_repo(
repo_id=args.repo_id,
repo_type="dataset",
private=bool(args.private),
token=token,
exist_ok=True,
)
api = HfApi(token=token)
if args.revision != "main":
try:
api.create_branch(repo_id=args.repo_id, repo_type="dataset", branch=args.revision, exist_ok=True)
except TypeError:
# Backward-compatible path for clients without exist_ok support.
try:
api.create_branch(repo_id=args.repo_id, repo_type="dataset", branch=args.revision)
except Exception:
pass
commit_info = upload_folder(
repo_id=args.repo_id,
repo_type="dataset",
folder_path=str(source_dir),
path_in_repo=".",
revision=args.revision,
commit_message="Update THIRAWAT mapper demo index artifact",
token=token,
)
print(f"Uploaded index artifact to {args.repo_id}@{args.revision}")
print(f"Commit URL: {commit_info.commit_url}")
return 0
def main() -> int:
args = parse_args()
return run(args)
if __name__ == "__main__":
raise SystemExit(main())