"""One-shot uploader for ground-truth CSVs to the companion HF dataset repo. Creates the dataset repo (private by default) if it doesn't exist, then uploads every .csv from --gt-dir to gt/.csv in the repo. Usage (run locally with a token that has write scope on the namespace): HF_TOKEN=hf_xxx python server/space/push_gt.py \\ --repo lanczos/graphtestbed-gt \\ --gt-dir ~/graphtestbed-gt """ from __future__ import annotations import argparse import os import sys from pathlib import Path from huggingface_hub import create_repo, upload_file def main() -> int: ap = argparse.ArgumentParser(prog="push_gt") ap.add_argument("--repo", default="lanczos/graphtestbed-gt", help="dataset repo id (default: lanczos/graphtestbed-gt)") ap.add_argument("--gt-dir", type=Path, required=True, help="local dir containing .csv files") ap.add_argument("--public", action="store_true", help="create the repo as public (default: private)") args = ap.parse_args() token = os.environ.get("HF_TOKEN") if not token: sys.exit("HF_TOKEN not set in env") if not args.gt_dir.exists(): sys.exit(f"--gt-dir not found: {args.gt_dir}") csvs = sorted(args.gt_dir.glob("*.csv")) if not csvs: sys.exit(f"no *.csv files under {args.gt_dir}") print(f"creating/confirming dataset repo {args.repo} (private={not args.public})") create_repo( repo_id=args.repo, repo_type="dataset", private=not args.public, exist_ok=True, token=token, ) for csv in csvs: rel = f"gt/{csv.name}" print(f"uploading {csv} → {args.repo}:{rel}") upload_file( path_or_fileobj=str(csv), path_in_repo=rel, repo_id=args.repo, repo_type="dataset", token=token, commit_message=f"upload {csv.name}", ) print(f"\ndone — {len(csvs)} ground-truth file(s) at:") print(f" https://huggingface.co/datasets/{args.repo}") return 0 if __name__ == "__main__": raise SystemExit(main())