sentiment-sleuth / src /utils /hf_uploader.py
elsayedelmandoh's picture
create license, hf_loader, and hf_uploader file
b454d73
"""Hugging Face asset uploader utilities.
Provides a reusable `upload_assets` function that can be imported by
CLI wrappers or CI, plus a small `main()` so it can still be invoked
as a script for local usage.
"""
from __future__ import annotations
import os
from pathlib import Path
from typing import Iterable, List, Optional
from dotenv import load_dotenv
load_dotenv()
try:
from huggingface_hub import HfApi, login
except Exception: # pragma: no cover - runtime environment may not have hf installed
HfApi = None
login = None
def upload_assets(
token: Optional[str],
repo_id: str,
repo_type: str = "model",
paths: Optional[Iterable[Path]] = None,
root: Optional[Path] = None,
):
"""Upload the provided files (Paths) to a Hugging Face `repo_id`.
- `token` is the HF token (string) or None to rely on existing login.
- `paths` is an iterable of Path objects (relative to `root` if provided)
If omitted, defaults to files under `data/models/` and `data/vectorizers/`.
- Returns a list of tuples (path, success_bool, message).
"""
if HfApi is None:
raise RuntimeError("huggingface_hub is not installed")
if token:
# prefer not to persist credentials to git-credential here
login(token=token, add_to_git_credential=False)
api = HfApi()
root = Path(root or Path.cwd())
if paths is None:
candidates = list(root.glob("data/models/**/*")) + list(root.glob("data/vectorizers/**/*"))
files = [p for p in candidates if p.is_file()]
else:
files = [p if p.is_absolute() else (root / p) for p in paths]
files = [p for p in files if p.exists() and p.is_file()]
results: List[tuple] = []
if not files:
return results
for f in files:
path_in_repo = str(f.relative_to(root)).replace("\\", "/")
try:
api.upload_file(
path_or_fileobj=str(f),
path_in_repo=path_in_repo,
repo_id=repo_id,
repo_type=repo_type,
token=token,
)
results.append((f, True, "OK"))
except Exception as exc:
results.append((f, False, str(exc)))
return results
def main():
token = os.getenv("HUGGINGFACE_API_KEY")
repo_id = os.getenv("HF_ASSETS_REPO")
repo_type = os.getenv("HF_ASSETS_REPO_TYPE") or "model"
if not repo_id:
print("HF_ASSETS_REPO not set in environment. Aborting.")
return
try:
results = upload_assets(token=token, repo_id=repo_id, repo_type=repo_type)
except Exception as e:
print(f"Upload failed: {e}")
return
if not results:
print("No files found to upload.")
return
print(f"Uploaded {len(results)} candidate files to {repo_id} (repo_type={repo_type})")
for f, ok, msg in results:
status = "OK" if ok else f"FAILED: {msg}"
print(f"{f} -> {status}")
if __name__ == "__main__":
main()