| """ |
| HuggingFace Space Synchronization Utilities |
| |
| This module handles synchronization between GitHub repositories and HuggingFace Spaces. |
| """ |
|
|
| import os |
| import shutil |
| from pathlib import Path |
| from typing import List, Dict, Any, Optional |
| from huggingface_hub import HfApi, Repository, create_repo |
| from dotenv import load_dotenv |
| import yaml |
|
|
| load_dotenv() |
|
|
|
|
| class HFSpaceSync: |
| """ |
| Manages synchronization between local files and HuggingFace Spaces. |
| """ |
|
|
| def __init__(self, config_path: str = "config.yaml"): |
| """ |
| Initialize HuggingFace Space synchronization. |
| |
| Args: |
| config_path: Path to configuration file |
| """ |
| self.config = self._load_config(config_path) |
| self.hf_token = os.getenv("HF_TOKEN") |
| self.hf_username = os.getenv("HF_USERNAME") |
| self.api = HfApi(token=self.hf_token) |
|
|
| if not self.hf_token: |
| raise ValueError("HF_TOKEN not found in environment variables") |
|
|
| def _load_config(self, config_path: str) -> Dict[str, Any]: |
| """Load configuration from YAML file.""" |
| try: |
| with open(config_path, 'r') as f: |
| return yaml.safe_load(f) |
| except FileNotFoundError: |
| print(f"Config file {config_path} not found, using defaults") |
| return {} |
|
|
| def create_space(self, space_name: str, space_type: str = "streamlit", |
| private: bool = False) -> str: |
| """ |
| Create a new HuggingFace Space. |
| |
| Args: |
| space_name: Name of the space to create |
| space_type: Type of space (streamlit, gradio, static) |
| private: Whether the space should be private |
| |
| Returns: |
| Space repository ID |
| """ |
| try: |
| repo_id = f"{self.hf_username}/{space_name}" |
| create_repo( |
| repo_id=repo_id, |
| token=self.hf_token, |
| repo_type="space", |
| space_sdk=space_type, |
| private=private |
| ) |
| print(f"Created space: {repo_id}") |
| return repo_id |
| except Exception as e: |
| if "already exists" in str(e).lower(): |
| print(f"Space {repo_id} already exists") |
| return repo_id |
| else: |
| raise |
|
|
| def upload_files(self, space_name: str, local_path: str, |
| repo_path: str = "", commit_message: str = "Update files") -> None: |
| """ |
| Upload files to a HuggingFace Space. |
| |
| Args: |
| space_name: Name of the space |
| local_path: Local file or directory path |
| repo_path: Path in the repository (default: root) |
| commit_message: Commit message |
| """ |
| repo_id = f"{self.hf_username}/{space_name}" |
|
|
| try: |
| if os.path.isfile(local_path): |
| self.api.upload_file( |
| path_or_fileobj=local_path, |
| path_in_repo=repo_path or os.path.basename(local_path), |
| repo_id=repo_id, |
| repo_type="space", |
| commit_message=commit_message |
| ) |
| print(f"Uploaded {local_path} to {repo_id}") |
| elif os.path.isdir(local_path): |
| self.api.upload_folder( |
| folder_path=local_path, |
| path_in_repo=repo_path, |
| repo_id=repo_id, |
| repo_type="space", |
| commit_message=commit_message |
| ) |
| print(f"Uploaded folder {local_path} to {repo_id}") |
| except Exception as e: |
| print(f"Error uploading to {repo_id}: {e}") |
| raise |
|
|
| def sync_directory(self, space_name: str, local_dir: str = ".", |
| exclude_patterns: List[str] = None) -> Dict[str, Any]: |
| """ |
| Synchronize a local directory with a HuggingFace Space. |
| |
| Args: |
| space_name: Name of the space |
| local_dir: Local directory to sync |
| exclude_patterns: Patterns to exclude from sync |
| |
| Returns: |
| Dictionary with sync results |
| """ |
| if exclude_patterns is None: |
| exclude_patterns = self.config.get('sync', {}).get('exclude_patterns', []) |
|
|
| repo_id = f"{self.hf_username}/{space_name}" |
|
|
| |
| try: |
| self.api.repo_info(repo_id=repo_id, repo_type="space") |
| except Exception: |
| print(f"Space {repo_id} doesn't exist, creating...") |
| self.create_space(space_name) |
|
|
| |
| uploaded_files = [] |
| skipped_files = [] |
|
|
| for root, dirs, files in os.walk(local_dir): |
| |
| dirs[:] = [d for d in dirs if not any(pattern in d for pattern in exclude_patterns)] |
|
|
| for file in files: |
| file_path = os.path.join(root, file) |
|
|
| |
| if any(pattern in file_path for pattern in exclude_patterns): |
| skipped_files.append(file_path) |
| continue |
|
|
| try: |
| rel_path = os.path.relpath(file_path, local_dir) |
| self.api.upload_file( |
| path_or_fileobj=file_path, |
| path_in_repo=rel_path, |
| repo_id=repo_id, |
| repo_type="space", |
| commit_message=f"Sync: {rel_path}" |
| ) |
| uploaded_files.append(rel_path) |
| print(f"Synced: {rel_path}") |
| except Exception as e: |
| print(f"Error syncing {file_path}: {e}") |
| skipped_files.append(file_path) |
|
|
| return { |
| "space": repo_id, |
| "uploaded": len(uploaded_files), |
| "skipped": len(skipped_files), |
| "uploaded_files": uploaded_files, |
| "skipped_files": skipped_files |
| } |
|
|
| def list_spaces(self) -> List[Dict[str, Any]]: |
| """ |
| List all spaces for the authenticated user. |
| |
| Returns: |
| List of space information dictionaries |
| """ |
| spaces = self.api.list_models(author=self.hf_username, filter="space") |
| return [{"id": space.id, "private": space.private} for space in spaces] |
|
|
| def download_space(self, space_name: str, local_dir: str) -> str: |
| """ |
| Download a space to a local directory. |
| |
| Args: |
| space_name: Name of the space |
| local_dir: Local directory to download to |
| |
| Returns: |
| Path to downloaded directory |
| """ |
| repo_id = f"{self.hf_username}/{space_name}" |
|
|
| from huggingface_hub import snapshot_download |
|
|
| snapshot_download( |
| repo_id=repo_id, |
| repo_type="space", |
| local_dir=local_dir, |
| token=self.hf_token |
| ) |
|
|
| print(f"Downloaded {repo_id} to {local_dir}") |
| return local_dir |
|
|
|
|
| if __name__ == "__main__": |
| |
| sync = HFSpaceSync() |
| print("Available spaces:") |
| for space in sync.list_spaces(): |
| print(f" - {space['id']}") |
|
|