Spaces:
Running
Running
File size: 1,814 Bytes
16dc556 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 | """Publish captured ScrubData agent traces to a Hugging Face Hub dataset.
Earns the "Sharing is Caring / Open trace" bonus quest. Run after you've exercised
the app (so data/traces/scrubdata-traces.jsonl has records).
huggingface-cli login # one-time (needs HF_TOKEN with write)
uv run scripts/publish_traces.py # uploads to build-small-hackathon/scrubdata-traces
The default repo is under the hackathon org; override with --repo for your own.
"""
from __future__ import annotations
import argparse
import sys
from pathlib import Path
DEFAULT_REPO = "build-small-hackathon/scrubdata-traces"
DEFAULT_FILE = "data/traces/scrubdata-traces.jsonl"
def main() -> int:
ap = argparse.ArgumentParser()
ap.add_argument("--repo", default=DEFAULT_REPO, help="HF dataset repo id")
ap.add_argument("--file", default=DEFAULT_FILE, help="local JSONL of traces")
ap.add_argument("--path-in-repo", default="scrubdata-traces.jsonl")
args = ap.parse_args()
src = Path(args.file)
if not src.exists() or src.stat().st_size == 0:
print(f"No traces at {src}. Run the app first to generate traces.", file=sys.stderr)
return 1
try:
from huggingface_hub import HfApi
except ImportError:
print("huggingface_hub not installed. `uv add huggingface_hub`.", file=sys.stderr)
return 1
api = HfApi()
api.create_repo(args.repo, repo_type="dataset", exist_ok=True)
api.upload_file(
path_or_fileobj=str(src),
path_in_repo=args.path_in_repo,
repo_id=args.repo,
repo_type="dataset",
commit_message="Update ScrubData agent traces",
)
print(f"Uploaded {src} → https://huggingface.co/datasets/{args.repo}")
return 0
if __name__ == "__main__":
raise SystemExit(main())
|