"""Publish captured ScrubData agent traces to a Hugging Face Hub dataset. Earns the "Sharing is Caring / Open trace" bonus quest. Run after you've exercised the app (so data/traces/scrubdata-traces.jsonl has records). huggingface-cli login # one-time (needs HF_TOKEN with write) uv run scripts/publish_traces.py # uploads to build-small-hackathon/scrubdata-traces The default repo is under the hackathon org; override with --repo for your own. """ from __future__ import annotations import argparse import sys from pathlib import Path DEFAULT_REPO = "build-small-hackathon/scrubdata-traces" DEFAULT_FILE = "data/traces/scrubdata-traces.jsonl" def main() -> int: ap = argparse.ArgumentParser() ap.add_argument("--repo", default=DEFAULT_REPO, help="HF dataset repo id") ap.add_argument("--file", default=DEFAULT_FILE, help="local JSONL of traces") ap.add_argument("--path-in-repo", default="scrubdata-traces.jsonl") args = ap.parse_args() src = Path(args.file) if not src.exists() or src.stat().st_size == 0: print(f"No traces at {src}. Run the app first to generate traces.", file=sys.stderr) return 1 try: from huggingface_hub import HfApi except ImportError: print("huggingface_hub not installed. `uv add huggingface_hub`.", file=sys.stderr) return 1 api = HfApi() api.create_repo(args.repo, repo_type="dataset", exist_ok=True) api.upload_file( path_or_fileobj=str(src), path_in_repo=args.path_in_repo, repo_id=args.repo, repo_type="dataset", commit_message="Update ScrubData agent traces", ) print(f"Uploaded {src} → https://huggingface.co/datasets/{args.repo}") return 0 if __name__ == "__main__": raise SystemExit(main())