Spaces:
Running on Zero
Running on Zero
| """ | |
| Publish a built index folder to Hugging Face Datasets. | |
| Example: | |
| python scripts/publish_index_to_hf.py \ | |
| --repo ZhangNy/radiology-index-qwen3-embedding-0.6b \ | |
| --folder ./index_out \ | |
| --token $HF_TOKEN | |
| """ | |
| from __future__ import annotations | |
| import argparse | |
| import sys | |
| from pathlib import Path | |
| # Allow running as `python scripts/*.py` without installing the package. | |
| sys.path.append(str(Path(__file__).resolve().parents[1])) | |
| def main() -> int: | |
| parser = argparse.ArgumentParser(description="Upload index artifacts to HF datasets repo") | |
| parser.add_argument("--repo", type=str, required=True, help="HF dataset repo id, e.g. user/my-index") | |
| parser.add_argument("--folder", type=str, required=True, help="Local folder containing chroma_db/ + doc_store.db") | |
| parser.add_argument("--token", type=str, default=None, help="HF token (or set HF_TOKEN env)") | |
| parser.add_argument("--private", action="store_true", help="Create repo as private") | |
| parser.add_argument("--revision", type=str, default="main", help="Target revision/branch") | |
| parser.add_argument( | |
| "--ignore", | |
| type=str, | |
| default="", | |
| help="Comma-separated ignore patterns for upload_folder (e.g. 'images/**,**/images/**')", | |
| ) | |
| args = parser.parse_args() | |
| from huggingface_hub import HfApi | |
| token = args.token or None | |
| if token is None: | |
| import os | |
| token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_TOKEN") | |
| if not token: | |
| raise SystemExit("Missing token. Provide --token or set HF_TOKEN.") | |
| folder = Path(args.folder) | |
| if not folder.exists(): | |
| raise SystemExit(f"Folder not found: {folder}") | |
| api = HfApi() | |
| api.create_repo( | |
| repo_id=args.repo, | |
| repo_type="dataset", | |
| private=bool(args.private), | |
| exist_ok=True, | |
| token=token, | |
| ) | |
| api.upload_folder( | |
| repo_id=args.repo, | |
| repo_type="dataset", | |
| folder_path=str(folder), | |
| path_in_repo="", | |
| token=token, | |
| revision=args.revision, | |
| commit_message="Upload prebuilt radiology RAG index", | |
| ignore_patterns=[p.strip() for p in (args.ignore or "").split(",") if p.strip()] or None, | |
| ) | |
| print(f"✓ Uploaded index folder to HF dataset repo: {args.repo}") | |
| return 0 | |
| if __name__ == "__main__": | |
| raise SystemExit(main()) | |