graphtestbed / server /space /push_gt.py
Zhu Jiajun (jz28583)
Add agents/ harness integrations and HF Space scoring deployment
d094faf
"""One-shot uploader for ground-truth CSVs to the companion HF dataset repo.
Creates the dataset repo (private by default) if it doesn't exist, then
uploads every <task>.csv from --gt-dir to gt/<task>.csv in the repo.
Usage (run locally with a token that has write scope on the namespace):
HF_TOKEN=hf_xxx python server/space/push_gt.py \\
--repo lanczos/graphtestbed-gt \\
--gt-dir ~/graphtestbed-gt
"""
from __future__ import annotations
import argparse
import os
import sys
from pathlib import Path
from huggingface_hub import create_repo, upload_file
def main() -> int:
ap = argparse.ArgumentParser(prog="push_gt")
ap.add_argument("--repo", default="lanczos/graphtestbed-gt",
help="dataset repo id (default: lanczos/graphtestbed-gt)")
ap.add_argument("--gt-dir", type=Path, required=True,
help="local dir containing <task>.csv files")
ap.add_argument("--public", action="store_true",
help="create the repo as public (default: private)")
args = ap.parse_args()
token = os.environ.get("HF_TOKEN")
if not token:
sys.exit("HF_TOKEN not set in env")
if not args.gt_dir.exists():
sys.exit(f"--gt-dir not found: {args.gt_dir}")
csvs = sorted(args.gt_dir.glob("*.csv"))
if not csvs:
sys.exit(f"no *.csv files under {args.gt_dir}")
print(f"creating/confirming dataset repo {args.repo} (private={not args.public})")
create_repo(
repo_id=args.repo, repo_type="dataset",
private=not args.public, exist_ok=True, token=token,
)
for csv in csvs:
rel = f"gt/{csv.name}"
print(f"uploading {csv}{args.repo}:{rel}")
upload_file(
path_or_fileobj=str(csv),
path_in_repo=rel,
repo_id=args.repo, repo_type="dataset",
token=token,
commit_message=f"upload {csv.name}",
)
print(f"\ndone — {len(csvs)} ground-truth file(s) at:")
print(f" https://huggingface.co/datasets/{args.repo}")
return 0
if __name__ == "__main__":
raise SystemExit(main())