from __future__ import annotations from collections.abc import Callable from pathlib import Path from typing import Protocol, cast from huggingface_hub import HfApi class HubApiLike(Protocol): def create_repo( self, repo_id: str, *, repo_type: str, private: bool, exist_ok: bool ) -> None: ... def upload_folder( self, *, repo_id: str, folder_path: Path, path_in_repo: str, repo_type: str, commit_message: str, ) -> None: ... def publish_dataset_snapshot( snapshot_dir: Path, hf_repo_id: str, *, private: bool, log: Callable[[str], None] | None = None, ) -> None: _publish_dataset_snapshot_api( cast("HubApiLike", HfApi()), snapshot_dir, hf_repo_id, private, log=log, ) def _publish_dataset_snapshot_api( api: HubApiLike, snapshot_dir: Path, hf_repo_id: str, private: bool, log: Callable[[str], None] | None = None, ) -> None: if log: log(f"Ensuring Hub dataset repo exists: {hf_repo_id}") api.create_repo(hf_repo_id, repo_type="dataset", private=private, exist_ok=True) if log: log(f"Uploading snapshot to Hub: {snapshot_dir}") api.upload_folder( repo_id=hf_repo_id, folder_path=snapshot_dir, path_in_repo=".", repo_type="dataset", commit_message=f"Add snapshot {snapshot_dir.name}", ) if log: log(f"Upload finished: {hf_repo_id}")