"""Upload GenSegDataset (processed_unified mirror + dataset card) to the Hugging Face Hub as a PRIVATE dataset repo. Storage is content-addressed, so fold-duplicated images collapse to unique blobs. Resumable via upload_large_folder.""" import sys from huggingface_hub import HfApi REPO = "MaybeRichard/GenSegDataset" DATA = "/home/wzhang/LSC/Dataset/Segmentation/processed_unified" CARD = "/home/wzhang/LSC/Dataset/Segmentation/GenSegDataset_README.md" api = HfApi() api.create_repo(REPO, repo_type="dataset", private=True, exist_ok=True) print("repo ready:", REPO, flush=True) # big data upload (resumable, dedups blobs, parallel) api.upload_large_folder(repo_id=REPO, repo_type="dataset", folder_path=DATA) print("folder uploaded", flush=True) # dataset card last so it is the final README at the repo root api.upload_file(path_or_fileobj=CARD, path_in_repo="README.md", repo_id=REPO, repo_type="dataset", commit_message="add dataset card") print("UPLOAD_DONE", flush=True)