Spaces:
Sleeping
Sleeping
| """One-shot uploader for ground-truth CSVs to the companion HF dataset repo. | |
| Creates the dataset repo (private by default) if it doesn't exist, then | |
| uploads every <task>.csv from --gt-dir to gt/<task>.csv in the repo. | |
| Usage (run locally with a token that has write scope on the namespace): | |
| HF_TOKEN=hf_xxx python server/space/push_gt.py \\ | |
| --repo lanczos/graphtestbed-gt \\ | |
| --gt-dir ~/graphtestbed-gt | |
| """ | |
| from __future__ import annotations | |
| import argparse | |
| import os | |
| import sys | |
| from pathlib import Path | |
| from huggingface_hub import create_repo, upload_file | |
| def main() -> int: | |
| ap = argparse.ArgumentParser(prog="push_gt") | |
| ap.add_argument("--repo", default="lanczos/graphtestbed-gt", | |
| help="dataset repo id (default: lanczos/graphtestbed-gt)") | |
| ap.add_argument("--gt-dir", type=Path, required=True, | |
| help="local dir containing <task>.csv files") | |
| ap.add_argument("--public", action="store_true", | |
| help="create the repo as public (default: private)") | |
| args = ap.parse_args() | |
| token = os.environ.get("HF_TOKEN") | |
| if not token: | |
| sys.exit("HF_TOKEN not set in env") | |
| if not args.gt_dir.exists(): | |
| sys.exit(f"--gt-dir not found: {args.gt_dir}") | |
| csvs = sorted(args.gt_dir.glob("*.csv")) | |
| if not csvs: | |
| sys.exit(f"no *.csv files under {args.gt_dir}") | |
| print(f"creating/confirming dataset repo {args.repo} (private={not args.public})") | |
| create_repo( | |
| repo_id=args.repo, repo_type="dataset", | |
| private=not args.public, exist_ok=True, token=token, | |
| ) | |
| for csv in csvs: | |
| rel = f"gt/{csv.name}" | |
| print(f"uploading {csv} → {args.repo}:{rel}") | |
| upload_file( | |
| path_or_fileobj=str(csv), | |
| path_in_repo=rel, | |
| repo_id=args.repo, repo_type="dataset", | |
| token=token, | |
| commit_message=f"upload {csv.name}", | |
| ) | |
| print(f"\ndone — {len(csvs)} ground-truth file(s) at:") | |
| print(f" https://huggingface.co/datasets/{args.repo}") | |
| return 0 | |
| if __name__ == "__main__": | |
| raise SystemExit(main()) | |