File size: 2,130 Bytes
d094faf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
"""One-shot uploader for ground-truth CSVs to the companion HF dataset repo.

Creates the dataset repo (private by default) if it doesn't exist, then
uploads every <task>.csv from --gt-dir to gt/<task>.csv in the repo.

Usage (run locally with a token that has write scope on the namespace):

    HF_TOKEN=hf_xxx python server/space/push_gt.py \\
        --repo lanczos/graphtestbed-gt \\
        --gt-dir ~/graphtestbed-gt
"""

from __future__ import annotations

import argparse
import os
import sys
from pathlib import Path

from huggingface_hub import create_repo, upload_file


def main() -> int:
    ap = argparse.ArgumentParser(prog="push_gt")
    ap.add_argument("--repo", default="lanczos/graphtestbed-gt",
                    help="dataset repo id (default: lanczos/graphtestbed-gt)")
    ap.add_argument("--gt-dir", type=Path, required=True,
                    help="local dir containing <task>.csv files")
    ap.add_argument("--public", action="store_true",
                    help="create the repo as public (default: private)")
    args = ap.parse_args()

    token = os.environ.get("HF_TOKEN")
    if not token:
        sys.exit("HF_TOKEN not set in env")

    if not args.gt_dir.exists():
        sys.exit(f"--gt-dir not found: {args.gt_dir}")

    csvs = sorted(args.gt_dir.glob("*.csv"))
    if not csvs:
        sys.exit(f"no *.csv files under {args.gt_dir}")

    print(f"creating/confirming dataset repo {args.repo} (private={not args.public})")
    create_repo(
        repo_id=args.repo, repo_type="dataset",
        private=not args.public, exist_ok=True, token=token,
    )

    for csv in csvs:
        rel = f"gt/{csv.name}"
        print(f"uploading {csv}{args.repo}:{rel}")
        upload_file(
            path_or_fileobj=str(csv),
            path_in_repo=rel,
            repo_id=args.repo, repo_type="dataset",
            token=token,
            commit_message=f"upload {csv.name}",
        )

    print(f"\ndone — {len(csvs)} ground-truth file(s) at:")
    print(f"  https://huggingface.co/datasets/{args.repo}")
    return 0


if __name__ == "__main__":
    raise SystemExit(main())