| """ |
| HF Hub Storage Client - Optional cloud storage for videos |
| Uploads videos to Hugging Face Hub Dataset repository |
| """ |
| import logging |
| from pathlib import Path |
| from typing import Optional |
|
|
| logger = logging.getLogger(__name__) |
|
|
| |
| try: |
| from huggingface_hub import HfApi, create_repo |
| HF_HUB_AVAILABLE = True |
| except ImportError: |
| HF_HUB_AVAILABLE = False |
| logger.warning("huggingface_hub not installed. Cloud storage disabled.") |
|
|
|
|
| class HFStorageClient: |
| """ |
| Optional HF Hub storage for videos. |
| |
| If HF_REPO and HF_TOKEN are set, uploads videos to HF Dataset repo. |
| Otherwise, does nothing (graceful degradation). |
| |
| Structure: |
| - your-repo/ |
| - short_video/ |
| - video_id.mp4 |
| - story_reels/ |
| - video_id.mp4 |
| - [future_module]/ |
| - video_id.mp4 |
| """ |
| |
| def __init__(self, repo_id: str = None, token: str = None): |
| """ |
| Initialize HF storage client. |
| |
| Args: |
| repo_id: HF repo ID (e.g., "username/ncakit-videos") |
| token: HF token with write access |
| """ |
| self.enabled = bool(repo_id and token and HF_HUB_AVAILABLE) |
| self.repo_id = repo_id |
| self.token = token |
| self.api = None |
| |
| if self.enabled: |
| self._initialize() |
| else: |
| if repo_id and token and not HF_HUB_AVAILABLE: |
| logger.warning("HF credentials provided but huggingface_hub not installed") |
| elif not repo_id or not token: |
| logger.info("HF cloud storage disabled (HF_REPO or HF_TOKEN not set)") |
| |
| def _initialize(self): |
| """Initialize HF API and create repo if needed""" |
| try: |
| self.api = HfApi(token=self.token) |
| |
| |
| try: |
| create_repo( |
| repo_id=self.repo_id, |
| repo_type="dataset", |
| token=self.token, |
| exist_ok=True, |
| private=True |
| ) |
| logger.info(f"HF storage enabled: {self.repo_id}") |
| except Exception as e: |
| logger.warning(f"Could not create/verify repo: {e}") |
| |
| except Exception as e: |
| logger.error(f"Failed to initialize HF storage: {e}") |
| self.enabled = False |
| |
| def upload_video( |
| self, |
| local_path: Path, |
| video_id: str, |
| folder: str = "videos" |
| ) -> Optional[str]: |
| """ |
| Upload video to HF Hub. |
| |
| Args: |
| local_path: Path to local video file |
| video_id: Unique video ID |
| folder: Folder name (e.g., "short_video", "story_reels") |
| |
| Returns: |
| Public URL if successful, None otherwise |
| """ |
| if not self.enabled: |
| return None |
| |
| if not local_path.exists(): |
| logger.error(f"Video file not found: {local_path}") |
| return None |
| |
| try: |
| |
| path_in_repo = f"{folder}/{video_id}.mp4" |
| |
| self.api.upload_file( |
| path_or_fileobj=str(local_path), |
| path_in_repo=path_in_repo, |
| repo_id=self.repo_id, |
| repo_type="dataset" |
| ) |
| |
| |
| public_url = f"https://huggingface.co/datasets/{self.repo_id}/resolve/main/{path_in_repo}?download=true" |
| |
| logger.info(f"Uploaded to HF: {public_url}") |
| return public_url |
| |
| except Exception as e: |
| logger.error(f"Failed to upload to HF Hub: {e}") |
| return None |
| |
| def delete_video(self, video_id: str, folder: str = "videos") -> bool: |
| """ |
| Delete video from HF Hub. |
| |
| Args: |
| video_id: Video ID to delete |
| folder: Folder name |
| |
| Returns: |
| True if successful |
| """ |
| if not self.enabled: |
| return False |
| |
| try: |
| path_in_repo = f"{folder}/{video_id}.mp4" |
| |
| self.api.delete_file( |
| path_in_repo=path_in_repo, |
| repo_id=self.repo_id, |
| repo_type="dataset" |
| ) |
| |
| logger.info(f"Deleted from HF: {path_in_repo}") |
| return True |
| |
| except Exception as e: |
| logger.error(f"Failed to delete from HF Hub: {e}") |
| return False |
|
|
|
|
| |
| _hf_storage: Optional[HFStorageClient] = None |
|
|
|
|
| def get_hf_storage() -> Optional[HFStorageClient]: |
| """Get the global HF storage client""" |
| return _hf_storage |
|
|
|
|
| def init_hf_storage(repo_id: str = None, token: str = None) -> HFStorageClient: |
| """Initialize the global HF storage client""" |
| global _hf_storage |
| _hf_storage = HFStorageClient(repo_id=repo_id, token=token) |
| return _hf_storage |
|
|