Spaces:
Running on Zero
Running on Zero
| """Create and initialize the public Hugging Face trace dataset.""" | |
| from __future__ import annotations | |
| import argparse | |
| import os | |
| from pathlib import Path | |
| from huggingface_hub import HfApi | |
| from huggingface_hub.errors import EntryNotFoundError | |
| ROOT = Path(__file__).resolve().parents[2] | |
| TRACE_DIR = Path(__file__).resolve().parents[1] | |
| DEFAULT_REPO = "build-small-hackathon/pakistan-notice-helper-traces" | |
| def main() -> int: | |
| parser = argparse.ArgumentParser(description=__doc__) | |
| parser.add_argument( | |
| "--repo-id", | |
| default=os.getenv("HF_TRACE_DATASET_REPO", DEFAULT_REPO), | |
| ) | |
| parser.add_argument("--dry-run", action="store_true") | |
| parser.add_argument( | |
| "--replace-data", | |
| action="store_true", | |
| help="Delete existing data files before uploading the current seed.", | |
| ) | |
| args = parser.parse_args() | |
| files = { | |
| TRACE_DIR / "dataset_card.md": "README.md", | |
| TRACE_DIR / "data" / "trace_samples.jsonl": "data/seed/trace_samples.jsonl", | |
| } | |
| if args.dry_run: | |
| print(f"Would create public dataset: {args.repo_id}") | |
| if args.replace_data: | |
| print("Would delete the existing data/ folder.") | |
| for local, remote in files.items(): | |
| print(f"Would upload {local} -> {remote}") | |
| return 0 | |
| api = HfApi(token=os.getenv("HF_TOKEN") or None) | |
| api.create_repo( | |
| repo_id=args.repo_id, | |
| repo_type="dataset", | |
| private=False, | |
| exist_ok=True, | |
| ) | |
| if args.replace_data: | |
| try: | |
| api.delete_folder( | |
| path_in_repo="data", | |
| repo_id=args.repo_id, | |
| repo_type="dataset", | |
| commit_message="Replace trace data for simplified schema", | |
| ) | |
| except EntryNotFoundError: | |
| pass | |
| for local, remote in files.items(): | |
| api.upload_file( | |
| path_or_fileobj=str(local), | |
| path_in_repo=remote, | |
| repo_id=args.repo_id, | |
| repo_type="dataset", | |
| commit_message=f"Add {remote}", | |
| ) | |
| print(f"Initialized https://huggingface.co/datasets/{args.repo_id}") | |
| return 0 | |
| if __name__ == "__main__": | |
| raise SystemExit(main()) | |