NCAkit / modules /shared /services /hf_storage.py
ismdrobiul489's picture
Fix: Add ?download=true to HF URLs for direct download
51966de
"""
HF Hub Storage Client - Optional cloud storage for videos
Uploads videos to Hugging Face Hub Dataset repository
"""
import logging
from pathlib import Path
from typing import Optional
logger = logging.getLogger(__name__)
# Will be imported if available
try:
from huggingface_hub import HfApi, create_repo
HF_HUB_AVAILABLE = True
except ImportError:
HF_HUB_AVAILABLE = False
logger.warning("huggingface_hub not installed. Cloud storage disabled.")
class HFStorageClient:
"""
Optional HF Hub storage for videos.
If HF_REPO and HF_TOKEN are set, uploads videos to HF Dataset repo.
Otherwise, does nothing (graceful degradation).
Structure:
- your-repo/
- short_video/
- video_id.mp4
- story_reels/
- video_id.mp4
- [future_module]/
- video_id.mp4
"""
def __init__(self, repo_id: str = None, token: str = None):
"""
Initialize HF storage client.
Args:
repo_id: HF repo ID (e.g., "username/ncakit-videos")
token: HF token with write access
"""
self.enabled = bool(repo_id and token and HF_HUB_AVAILABLE)
self.repo_id = repo_id
self.token = token
self.api = None
if self.enabled:
self._initialize()
else:
if repo_id and token and not HF_HUB_AVAILABLE:
logger.warning("HF credentials provided but huggingface_hub not installed")
elif not repo_id or not token:
logger.info("HF cloud storage disabled (HF_REPO or HF_TOKEN not set)")
def _initialize(self):
"""Initialize HF API and create repo if needed"""
try:
self.api = HfApi(token=self.token)
# Create repo if not exists (private for security)
try:
create_repo(
repo_id=self.repo_id,
repo_type="dataset",
token=self.token,
exist_ok=True,
private=True # Private repo - uses HF_TOKEN for access
)
logger.info(f"HF storage enabled: {self.repo_id}")
except Exception as e:
logger.warning(f"Could not create/verify repo: {e}")
except Exception as e:
logger.error(f"Failed to initialize HF storage: {e}")
self.enabled = False
def upload_video(
self,
local_path: Path,
video_id: str,
folder: str = "videos"
) -> Optional[str]:
"""
Upload video to HF Hub.
Args:
local_path: Path to local video file
video_id: Unique video ID
folder: Folder name (e.g., "short_video", "story_reels")
Returns:
Public URL if successful, None otherwise
"""
if not self.enabled:
return None
if not local_path.exists():
logger.error(f"Video file not found: {local_path}")
return None
try:
# Upload file
path_in_repo = f"{folder}/{video_id}.mp4"
self.api.upload_file(
path_or_fileobj=str(local_path),
path_in_repo=path_in_repo,
repo_id=self.repo_id,
repo_type="dataset"
)
# Generate download URL (with ?download=true for direct download)
public_url = f"https://huggingface.co/datasets/{self.repo_id}/resolve/main/{path_in_repo}?download=true"
logger.info(f"Uploaded to HF: {public_url}")
return public_url
except Exception as e:
logger.error(f"Failed to upload to HF Hub: {e}")
return None
def delete_video(self, video_id: str, folder: str = "videos") -> bool:
"""
Delete video from HF Hub.
Args:
video_id: Video ID to delete
folder: Folder name
Returns:
True if successful
"""
if not self.enabled:
return False
try:
path_in_repo = f"{folder}/{video_id}.mp4"
self.api.delete_file(
path_in_repo=path_in_repo,
repo_id=self.repo_id,
repo_type="dataset"
)
logger.info(f"Deleted from HF: {path_in_repo}")
return True
except Exception as e:
logger.error(f"Failed to delete from HF Hub: {e}")
return False
# Singleton instance (initialized in modules/__init__.py or app startup)
_hf_storage: Optional[HFStorageClient] = None
def get_hf_storage() -> Optional[HFStorageClient]:
"""Get the global HF storage client"""
return _hf_storage
def init_hf_storage(repo_id: str = None, token: str = None) -> HFStorageClient:
"""Initialize the global HF storage client"""
global _hf_storage
_hf_storage = HFStorageClient(repo_id=repo_id, token=token)
return _hf_storage