File size: 1,498 Bytes
dbf7313
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
from __future__ import annotations

from collections.abc import Callable
from pathlib import Path
from typing import Protocol, cast

from huggingface_hub import HfApi


class HubApiLike(Protocol):
    def create_repo(
        self, repo_id: str, *, repo_type: str, private: bool, exist_ok: bool
    ) -> None: ...

    def upload_folder(
        self,
        *,
        repo_id: str,
        folder_path: Path,
        path_in_repo: str,
        repo_type: str,
        commit_message: str,
    ) -> None: ...


def publish_dataset_snapshot(
    snapshot_dir: Path,
    hf_repo_id: str,
    *,
    private: bool,
    log: Callable[[str], None] | None = None,
) -> None:
    _publish_dataset_snapshot_api(
        cast("HubApiLike", HfApi()),
        snapshot_dir,
        hf_repo_id,
        private,
        log=log,
    )


def _publish_dataset_snapshot_api(
    api: HubApiLike,
    snapshot_dir: Path,
    hf_repo_id: str,
    private: bool,
    log: Callable[[str], None] | None = None,
) -> None:
    if log:
        log(f"Ensuring Hub dataset repo exists: {hf_repo_id}")
    api.create_repo(hf_repo_id, repo_type="dataset", private=private, exist_ok=True)
    if log:
        log(f"Uploading snapshot to Hub: {snapshot_dir}")
    api.upload_folder(
        repo_id=hf_repo_id,
        folder_path=snapshot_dir,
        path_in_repo=".",
        repo_type="dataset",
        commit_message=f"Add snapshot {snapshot_dir.name}",
    )
    if log:
        log(f"Upload finished: {hf_repo_id}")