File size: 3,005 Bytes
b454d73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
"""Hugging Face asset uploader utilities.

Provides a reusable `upload_assets` function that can be imported by
CLI wrappers or CI, plus a small `main()` so it can still be invoked
as a script for local usage.
"""
from __future__ import annotations

import os
from pathlib import Path
from typing import Iterable, List, Optional

from dotenv import load_dotenv

load_dotenv()

try:
    from huggingface_hub import HfApi, login
except Exception:  # pragma: no cover - runtime environment may not have hf installed
    HfApi = None
    login = None


def upload_assets(
    token: Optional[str],
    repo_id: str,
    repo_type: str = "model",
    paths: Optional[Iterable[Path]] = None,
    root: Optional[Path] = None,
):
    """Upload the provided files (Paths) to a Hugging Face `repo_id`.

    - `token` is the HF token (string) or None to rely on existing login.
    - `paths` is an iterable of Path objects (relative to `root` if provided)
      If omitted, defaults to files under `data/models/` and `data/vectorizers/`.
    - Returns a list of tuples (path, success_bool, message).
    """
    if HfApi is None:
        raise RuntimeError("huggingface_hub is not installed")

    if token:
        # prefer not to persist credentials to git-credential here
        login(token=token, add_to_git_credential=False)

    api = HfApi()

    root = Path(root or Path.cwd())

    if paths is None:
        candidates = list(root.glob("data/models/**/*")) + list(root.glob("data/vectorizers/**/*"))
        files = [p for p in candidates if p.is_file()]
    else:
        files = [p if p.is_absolute() else (root / p) for p in paths]
        files = [p for p in files if p.exists() and p.is_file()]

    results: List[tuple] = []

    if not files:
        return results

    for f in files:
        path_in_repo = str(f.relative_to(root)).replace("\\", "/")
        try:
            api.upload_file(
                path_or_fileobj=str(f),
                path_in_repo=path_in_repo,
                repo_id=repo_id,
                repo_type=repo_type,
                token=token,
            )
            results.append((f, True, "OK"))
        except Exception as exc:
            results.append((f, False, str(exc)))

    return results


def main():
    token = os.getenv("HUGGINGFACE_API_KEY")
    repo_id = os.getenv("HF_ASSETS_REPO")
    repo_type = os.getenv("HF_ASSETS_REPO_TYPE") or "model"

    if not repo_id:
        print("HF_ASSETS_REPO not set in environment. Aborting.")
        return

    try:
        results = upload_assets(token=token, repo_id=repo_id, repo_type=repo_type)
    except Exception as e:
        print(f"Upload failed: {e}")
        return

    if not results:
        print("No files found to upload.")
        return

    print(f"Uploaded {len(results)} candidate files to {repo_id} (repo_type={repo_type})")
    for f, ok, msg in results:
        status = "OK" if ok else f"FAILED: {msg}"
        print(f"{f} -> {status}")


if __name__ == "__main__":
    main()