sentiment-sleuth / src /utils /hf_loader.py
elsayedelmandoh's picture
create license, hf_loader, and hf_uploader file
b454d73
from pathlib import Path
import os
import joblib
from src.config import settings
# Read asset list and cache dir from settings (single source of truth)
ASSET_PATHS = list(settings.ASSET_PATHS)
CACHE_DIR = Path(settings.ASSET_CACHE_DIR)
def load_assets_hf():
"""Minimal asset loader that prefers local files and can download from Hugging Face Hub.
Returns a tuple of loaded objects or None for missing entries.
"""
# optional HF Hub download function
try:
from huggingface_hub import hf_hub_download # type: ignore
except Exception:
hf_hub_download = None
CACHE_DIR.mkdir(parents=True, exist_ok=True)
hf_repo = os.environ.get('HF_ASSETS_REPO')
hf_repo_type = os.environ.get('HF_ASSETS_REPO_TYPE') or None
assets = []
for p in ASSET_PATHS:
lp = Path(p)
loaded = None
if lp.exists():
try:
loaded = joblib.load(lp)
print(f"Loaded local asset: {lp}")
except Exception as e:
print(f"Failed to load local asset {lp}: {e}")
loaded = None
else:
print(f"Local asset not found: {lp}")
if loaded is None and hf_hub_download is not None and hf_repo:
filename = lp.name
try:
print(f"Attempting to download '{filename}' from HF repo '{hf_repo}'...")
if hf_repo_type:
downloaded_path = hf_hub_download(repo_id=hf_repo, filename=filename, repo_type=hf_repo_type)
else:
downloaded_path = hf_hub_download(repo_id=hf_repo, filename=filename)
downloaded = Path(downloaded_path)
target = CACHE_DIR / filename
try:
if not target.exists():
downloaded.replace(target)
except Exception:
try:
import shutil
if not target.exists():
shutil.copy2(downloaded, target)
except Exception:
pass
load_from = target if target.exists() else downloaded
loaded = joblib.load(load_from)
print(f"Downloaded and loaded asset: {load_from}")
except Exception as e:
print(f"Failed to download/load '{filename}' from HF Hub: {e}")
loaded = None
else:
if loaded is None:
if hf_hub_download is None:
print("huggingface_hub not installed; skipping HF download attempts.")
elif not hf_repo:
print("Environment variable 'HF_ASSETS_REPO' not set; skipping HF download attempts.")
assets.append(loaded)
return tuple(assets)