Spaces:
Running
Running
| """Hugging Face Hub integration utilities.""" | |
| import re | |
| import os | |
| from typing import Optional | |
| from huggingface_hub import HfApi, create_repo | |
| from dotenv import load_dotenv | |
| # Load environment variables | |
| load_dotenv() | |
| HF_TOKEN = os.getenv("HUGGINGFACE_ACCESS_TOKEN") | |
| def is_valid_repo_name(repo_name: str) -> bool: | |
| """Check if a repository name is valid for Hugging Face Hub. | |
| Args: | |
| repo_name: The repository name to validate | |
| Returns: | |
| True if the name is valid, False otherwise | |
| """ | |
| return bool(re.match(r'^[A-Za-z0-9_./-]+$', repo_name)) | |
| def create_hf_repo( | |
| repo_name: str, | |
| repo_type: str = "dataset", | |
| private: bool = False | |
| ) -> str: | |
| """Create a new repository on Hugging Face Hub. | |
| Args: | |
| repo_name: Name of the repository to create | |
| repo_type: Type of repository (dataset, model, or space) | |
| private: Whether the repository should be private | |
| Returns: | |
| The repository ID | |
| Raises: | |
| Exception: If the repository name is invalid or creation fails | |
| """ | |
| if not is_valid_repo_name(repo_name): | |
| raise Exception( | |
| "Invalid repo name: must not contain slashes, spaces, or special " | |
| "characters except '-', '_', '.'" | |
| ) | |
| try: | |
| api = HfApi(token=HF_TOKEN) | |
| create_repo( | |
| repo_id=repo_name, | |
| repo_type=repo_type, | |
| private=private, | |
| exist_ok=True, | |
| token=HF_TOKEN | |
| ) | |
| return repo_name | |
| except Exception as e: | |
| raise Exception(f"Error creating repository: {str(e)}") | |
| def upload_to_hf( | |
| file_path: str, | |
| repo_name: str, | |
| repo_type: str = "dataset", | |
| private: bool = False | |
| ) -> str: | |
| """Upload a file to Hugging Face Hub. | |
| Args: | |
| file_path: Path to the file to upload | |
| repo_name: Name of the repository to upload to | |
| repo_type: Type of repository | |
| private: Whether the repository should be private | |
| Returns: | |
| The repository ID | |
| Raises: | |
| Exception: If the upload fails | |
| """ | |
| try: | |
| # Create or get repository | |
| repo_id = create_hf_repo(repo_name, repo_type, private) | |
| # Upload file | |
| api = HfApi(token=HF_TOKEN) | |
| api.upload_file( | |
| path_or_fileobj=file_path, | |
| path_in_repo=os.path.basename(file_path), | |
| repo_id=repo_id, | |
| repo_type=repo_type, | |
| token=HF_TOKEN | |
| ) | |
| return repo_id | |
| except Exception as e: | |
| raise Exception(f"Error uploading to Hugging Face Hub: {str(e)}") | |
| def download_from_hf( | |
| repo_name: str, | |
| file_name: str, | |
| local_path: Optional[str] = None | |
| ) -> str: | |
| """Download a file from Hugging Face Hub. | |
| Args: | |
| repo_name: Name of the repository to download from | |
| file_name: Name of the file to download | |
| local_path: Optional local path to save the file to | |
| Returns: | |
| Path to the downloaded file | |
| Raises: | |
| Exception: If the download fails | |
| """ | |
| try: | |
| import requests | |
| # Construct the raw URL for the file | |
| raw_url = f"https://huggingface.co/datasets/{repo_name}/raw/main/{file_name}" | |
| # Download the file | |
| response = requests.get(raw_url) | |
| if response.status_code != 200: | |
| raise Exception(f"Failed to download file: {response.status_code}") | |
| # Save the file | |
| if local_path is None: | |
| local_path = os.path.join("data", file_name) | |
| os.makedirs(os.path.dirname(local_path), exist_ok=True) | |
| with open(local_path, "wb") as f: | |
| f.write(response.content) | |
| return local_path | |
| except Exception as e: | |
| raise Exception(f"Error downloading from Hugging Face Hub: {str(e)}") |