Add torch.hub and HuggingFace Hub support

Files changed (6) hide show

README_HF.md +92 -0
chiluka/__init__.py +37 -1
chiluka/hub.py +347 -0
chiluka/inference.py +61 -0
hubconf.py +84 -0
setup.py +1 -0

README_HF.md ADDED Viewed

	@@ -0,0 +1,92 @@

+---
+language:
+  - en
+  - te
+  - hi
+license: mit
+library_name: chiluka
+tags:
+  - text-to-speech
+  - tts
+  - styletts2
+  - voice-cloning
+---
+# Chiluka TTS
+Chiluka (చిలుక - Telugu for "parrot") is a lightweight Text-to-Speech model based on StyleTTS2.
+## Installation
+```bash
+pip install chiluka
+```
+Or install from source:
+```bash
+pip install git+https://github.com/Seemanth/chiluka.git
+```
+## Usage
+### Quick Start (Auto-download)
+```python
+from chiluka import Chiluka
+# Automatically downloads model weights
+tts = Chiluka.from_pretrained()
+# Generate speech
+wav = tts.synthesize(
+    text="Hello, world!",
+    reference_audio="path/to/reference.wav",
+    language="en"
+)
+# Save output
+tts.save_wav(wav, "output.wav")
+```
+### PyTorch Hub
+```python
+import torch
+tts = torch.hub.load('Seemanth/chiluka', 'chiluka')
+wav = tts.synthesize("Hello!", "reference.wav", language="en")
+```
+### HuggingFace Hub
+```python
+from chiluka import Chiluka
+tts = Chiluka.from_pretrained("Seemanth/chiluka-tts")
+```
+## Parameters
+- `text`: Input text to synthesize
+- `reference_audio`: Path to reference audio for style transfer
+- `language`: Language code ('en', 'te', 'hi', etc.)
+- `alpha`: Acoustic style mixing (0-1, default 0.3)
+- `beta`: Prosodic style mixing (0-1, default 0.7)
+- `diffusion_steps`: Quality vs speed tradeoff (default 5)
+## Supported Languages
+Uses espeak-ng phonemizer. Common languages:
+- English: `en-us`, `en-gb`
+- Telugu: `te`
+- Hindi: `hi`
+- Tamil: `ta`
+## License
+MIT License
+## Citation
+Based on StyleTTS2 by Yinghao Aaron Li et al.

chiluka/__init__.py CHANGED Viewed

@@ -1,9 +1,45 @@
 """
 Chiluka - A lightweight TTS inference package based on StyleTTS2
 """
 __version__ = "0.1.0"
 from .inference import Chiluka
-__all__ = ["Chiluka"]

 """
 Chiluka - A lightweight TTS inference package based on StyleTTS2
+Usage:
+    # Local weights (if you have them)
+    from chiluka import Chiluka
+    tts = Chiluka()
+    # Auto-download from HuggingFace Hub (recommended)
+    from chiluka import Chiluka
+    tts = Chiluka.from_pretrained()
+    # From specific HuggingFace repo
+    tts = Chiluka.from_pretrained("username/model-name")
+    # Generate speech
+    wav = tts.synthesize(
+        text="Hello, world!",
+        reference_audio="reference.wav",
+        language="en"
+    )
+    tts.save_wav(wav, "output.wav")
 """
 __version__ = "0.1.0"
 from .inference import Chiluka
+from .hub import (
+    download_from_hf,
+    push_to_hub,
+    clear_cache,
+    get_cache_dir,
+    create_model_card,
+    DEFAULT_HF_REPO,
+)
+__all__ = [
+    "Chiluka",
+    "download_from_hf",
+    "push_to_hub",
+    "clear_cache",
+    "get_cache_dir",
+    "create_model_card",
+    "DEFAULT_HF_REPO",
+]

chiluka/hub.py ADDED Viewed

	@@ -0,0 +1,347 @@

+"""
+Hub utilities for downloading and managing Chiluka TTS models.
+Supports:
+- HuggingFace Hub integration
+- Automatic model downloading
+- Local caching
+"""
+import os
+import shutil
+from pathlib import Path
+from typing import Optional, Union
+# Default HuggingFace Hub repository
+DEFAULT_HF_REPO = "yourusername/chiluka-tts"  # TODO: Update with your actual repo
+# Cache directory for downloaded models
+CACHE_DIR = Path.home() / ".cache" / "chiluka"
+# Required model files
+REQUIRED_FILES = {
+    "checkpoint": "checkpoints/epoch_2nd_00017.pth",
+    "config": "configs/config_ft.yml",
+    "asr_config": "pretrained/ASR/config.yml",
+    "asr_model": "pretrained/ASR/epoch_00080.pth",
+    "f0_model": "pretrained/JDC/bst.t7",
+    "plbert_config": "pretrained/PLBERT/config.yml",
+    "plbert_model": "pretrained/PLBERT/step_1000000.t7",
+}
+def get_cache_dir() -> Path:
+    """Get the cache directory for Chiluka models."""
+    cache_dir = Path(os.environ.get("CHILUKA_CACHE", CACHE_DIR))
+    cache_dir.mkdir(parents=True, exist_ok=True)
+    return cache_dir
+def is_model_cached(repo_id: str = DEFAULT_HF_REPO) -> bool:
+    """Check if a model is already cached locally."""
+    cache_path = get_cache_dir() / repo_id.replace("/", "_")
+    if not cache_path.exists():
+        return False
+    # Check if all required files exist
+    for file_path in REQUIRED_FILES.values():
+        if not (cache_path / file_path).exists():
+            return False
+    return True
+def download_from_hf(
+    repo_id: str = DEFAULT_HF_REPO,
+    revision: str = "main",
+    force_download: bool = False,
+    token: Optional[str] = None,
+) -> Path:
+    """
+    Download model files from HuggingFace Hub.
+    Args:
+        repo_id: HuggingFace Hub repository ID (e.g., 'username/model-name')
+        revision: Git revision to download (branch, tag, or commit hash)
+        force_download: If True, re-download even if cached
+        token: HuggingFace API token for private repos
+    Returns:
+        Path to the downloaded model directory
+    Example:
+        >>> model_path = download_from_hf("yourusername/chiluka-tts")
+        >>> print(model_path)
+        /home/user/.cache/chiluka/yourusername_chiluka-tts
+    """
+    try:
+        from huggingface_hub import snapshot_download, hf_hub_download
+    except ImportError:
+        raise ImportError(
+            "huggingface_hub is required for downloading models. "
+            "Install with: pip install huggingface_hub"
+        )
+    cache_path = get_cache_dir() / repo_id.replace("/", "_")
+    if is_model_cached(repo_id) and not force_download:
+        print(f"Using cached model from {cache_path}")
+        return cache_path
+    print(f"Downloading model from HuggingFace Hub: {repo_id}...")
+    # Download entire repository
+    downloaded_path = snapshot_download(
+        repo_id=repo_id,
+        revision=revision,
+        cache_dir=get_cache_dir() / "hf_cache",
+        token=token,
+        local_dir=cache_path,
+        local_dir_use_symlinks=False,
+    )
+    print(f"Model downloaded to {cache_path}")
+    return Path(downloaded_path)
+def download_from_url(
+    url: str,
+    filename: str,
+    force_download: bool = False,
+) -> Path:
+    """
+    Download a single file from a URL.
+    Args:
+        url: URL to download from
+        filename: Local filename to save as
+        force_download: If True, re-download even if exists
+    Returns:
+        Path to the downloaded file
+    """
+    import urllib.request
+    cache_dir = get_cache_dir() / "downloads"
+    cache_dir.mkdir(parents=True, exist_ok=True)
+    local_path = cache_dir / filename
+    if local_path.exists() and not force_download:
+        print(f"Using cached file: {local_path}")
+        return local_path
+    print(f"Downloading {filename}...")
+    # Download with progress
+    def _progress_hook(count, block_size, total_size):
+        percent = int(count * block_size * 100 / total_size)
+        print(f"\rDownloading: {percent}%", end="", flush=True)
+    urllib.request.urlretrieve(url, local_path, reporthook=_progress_hook)
+    print()  # New line after progress
+    return local_path
+def get_model_paths(repo_id: str = DEFAULT_HF_REPO) -> dict:
+    """
+    Get paths to all model files after downloading.
+    Args:
+        repo_id: HuggingFace Hub repository ID
+    Returns:
+        Dictionary with paths to config, checkpoint, and pretrained directory
+    """
+    model_dir = download_from_hf(repo_id)
+    return {
+        "config_path": str(model_dir / "configs" / "config_ft.yml"),
+        "checkpoint_path": str(model_dir / "checkpoints" / "epoch_2nd_00017.pth"),
+        "pretrained_dir": str(model_dir / "pretrained"),
+    }
+def clear_cache(repo_id: Optional[str] = None):
+    """
+    Clear cached models.
+    Args:
+        repo_id: If specified, only clear cache for this repo.
+                If None, clear entire cache.
+    """
+    cache_dir = get_cache_dir()
+    if repo_id:
+        cache_path = cache_dir / repo_id.replace("/", "_")
+        if cache_path.exists():
+            shutil.rmtree(cache_path)
+            print(f"Cleared cache for {repo_id}")
+    else:
+        if cache_dir.exists():
+            shutil.rmtree(cache_dir)
+            print("Cleared entire Chiluka cache")
+def push_to_hub(
+    local_dir: str,
+    repo_id: str,
+    token: Optional[str] = None,
+    private: bool = False,
+    commit_message: str = "Upload Chiluka TTS model",
+):
+    """
+    Push a local model to HuggingFace Hub.
+    Args:
+        local_dir: Local directory containing model files
+        repo_id: Target HuggingFace Hub repository ID
+        token: HuggingFace API token (or set HF_TOKEN env var)
+        private: Whether to create a private repository
+        commit_message: Commit message for the upload
+    Example:
+        >>> push_to_hub(
+        ...     local_dir="./chiluka",
+        ...     repo_id="myusername/my-chiluka-model",
+        ...     private=False
+        ... )
+    """
+    try:
+        from huggingface_hub import HfApi, create_repo
+    except ImportError:
+        raise ImportError(
+            "huggingface_hub is required for pushing models. "
+            "Install with: pip install huggingface_hub"
+        )
+    api = HfApi(token=token)
+    # Create repo if it doesn't exist
+    try:
+        create_repo(repo_id, private=private, token=token, exist_ok=True)
+    except Exception as e:
+        print(f"Note: {e}")
+    # Upload folder
+    print(f"Uploading to {repo_id}...")
+    api.upload_folder(
+        folder_path=local_dir,
+        repo_id=repo_id,
+        commit_message=commit_message,
+        ignore_patterns=["*.pyc", "__pycache__", "*.egg-info", ".git"],
+    )
+    print(f"Model uploaded to: https://huggingface.co/{repo_id}")
+def create_model_card(repo_id: str, save_path: Optional[str] = None) -> str:
+    """
+    Generate a model card (README.md) for HuggingFace Hub.
+    Args:
+        repo_id: Repository ID for the model
+        save_path: If provided, save the model card to this path
+    Returns:
+        Model card content as string
+    """
+    model_card = f"""---
+language:
+  - en
+  - te
+  - hi
+license: mit
+library_name: chiluka
+tags:
+  - text-to-speech
+  - tts
+  - styletts2
+  - voice-cloning
+---
+# Chiluka TTS
+Chiluka (చిలుక - Telugu for "parrot") is a lightweight Text-to-Speech model based on StyleTTS2.
+## Installation
+```bash
+pip install chiluka
+```
+Or install from source:
+```bash
+pip install git+https://github.com/{repo_id.split('/')[0]}/chiluka.git
+```
+## Usage
+### Quick Start (Auto-download)
+```python
+from chiluka import Chiluka
+# Automatically downloads model weights
+tts = Chiluka.from_pretrained()
+# Generate speech
+wav = tts.synthesize(
+    text="Hello, world!",
+    reference_audio="path/to/reference.wav",
+    language="en"
+)
+# Save output
+tts.save_wav(wav, "output.wav")
+```
+### PyTorch Hub
+```python
+import torch
+tts = torch.hub.load('{repo_id.split('/')[0]}/chiluka', 'chiluka')
+wav = tts.synthesize("Hello!", "reference.wav", language="en")
+```
+### HuggingFace Hub
+```python
+from chiluka import Chiluka
+tts = Chiluka.from_pretrained("{repo_id}")
+```
+## Parameters
+- `text`: Input text to synthesize
+- `reference_audio`: Path to reference audio for style transfer
+- `language`: Language code ('en', 'te', 'hi', etc.)
+- `alpha`: Acoustic style mixing (0-1, default 0.3)
+- `beta`: Prosodic style mixing (0-1, default 0.7)
+- `diffusion_steps`: Quality vs speed tradeoff (default 5)
+## Supported Languages
+Uses espeak-ng phonemizer. Common languages:
+- English: `en-us`, `en-gb`
+- Telugu: `te`
+- Hindi: `hi`
+- Tamil: `ta`
+## License
+MIT License
+## Citation
+Based on StyleTTS2 by Yinghao Aaron Li et al.
+"""
+    if save_path:
+        with open(save_path, "w") as f:
+            f.write(model_card)
+        print(f"Model card saved to {save_path}")
+    return model_card

chiluka/inference.py CHANGED Viewed

@@ -152,6 +152,67 @@ class Chiluka:
         print("✓ Chiluka TTS initialized successfully!")
     def _verify_pretrained_models(self, asr_path, f0_path, plbert_dir):
         """Verify all pretrained models exist."""
         missing = []

         print("✓ Chiluka TTS initialized successfully!")
+    @classmethod
+    def from_pretrained(
+        cls,
+        repo_id: str = None,
+        device: Optional[str] = None,
+        force_download: bool = False,
+        token: Optional[str] = None,
+        **kwargs,
+    ) -> "Chiluka":
+        """
+        Load Chiluka TTS from HuggingFace Hub or with auto-downloaded weights.
+        This is the recommended way to load Chiluka when you don't have local weights.
+        Weights are automatically downloaded and cached on first use.
+        Args:
+            repo_id: HuggingFace Hub repository ID (e.g., 'username/chiluka-tts').
+                    If None, uses the default repository.
+            device: Device to use ('cuda' or 'cpu'). Auto-detects if None.
+            force_download: If True, re-download even if cached.
+            token: HuggingFace API token for private repositories.
+            **kwargs: Additional arguments passed to Chiluka constructor.
+        Returns:
+            Initialized Chiluka TTS model ready for inference.
+        Examples:
+            # Default repository (auto-download)
+            >>> tts = Chiluka.from_pretrained()
+            # Specific repository
+            >>> tts = Chiluka.from_pretrained("myuser/my-chiluka-model")
+            # Force re-download
+            >>> tts = Chiluka.from_pretrained(force_download=True)
+            # Private repository
+            >>> tts = Chiluka.from_pretrained("myuser/private-model", token="hf_xxx")
+        """
+        from .hub import download_from_hf, get_model_paths, DEFAULT_HF_REPO
+        repo_id = repo_id or DEFAULT_HF_REPO
+        # Download model files (or use cache)
+        model_dir = download_from_hf(
+            repo_id=repo_id,
+            force_download=force_download,
+            token=token,
+        )
+        # Get paths to model files
+        paths = get_model_paths(repo_id)
+        return cls(
+            config_path=paths["config_path"],
+            checkpoint_path=paths["checkpoint_path"],
+            pretrained_dir=paths["pretrained_dir"],
+            device=device,
+            **kwargs,
+        )
     def _verify_pretrained_models(self, asr_path, f0_path, plbert_dir):
         """Verify all pretrained models exist."""
         missing = []

hubconf.py ADDED Viewed

	@@ -0,0 +1,84 @@

+"""
+PyTorch Hub configuration for Chiluka TTS.
+Usage:
+    import torch
+    # Load the model
+    tts = torch.hub.load('yourusername/chiluka', 'chiluka')
+    # Or with force reload
+    tts = torch.hub.load('yourusername/chiluka', 'chiluka', force_reload=True)
+    # Generate speech
+    wav = tts.synthesize(
+        text="Hello, world!",
+        reference_audio="path/to/reference.wav",
+        language="en"
+    )
+"""
+dependencies = [
+    'torch',
+    'torchaudio',
+    'transformers',
+    'librosa',
+    'phonemizer',
+    'nltk',
+    'PyYAML',
+    'munch',
+    'einops',
+    'einops-exts',
+    'numpy',
+    'scipy',
+    'huggingface_hub',
+]
+def chiluka(pretrained: bool = True, device: str = None, **kwargs):
+    """
+    Load Chiluka TTS model.
+    Args:
+        pretrained: If True, downloads pretrained weights from HuggingFace Hub.
+                   If False, returns uninitialized model (requires manual weight loading).
+        device: Device to use ('cuda' or 'cpu'). Auto-detects if None.
+        **kwargs: Additional arguments passed to Chiluka constructor.
+    Returns:
+        Chiluka: Initialized TTS model ready for inference.
+    Example:
+        >>> import torch
+        >>> tts = torch.hub.load('yourusername/chiluka', 'chiluka')
+        >>> wav = tts.synthesize("Hello!", "reference.wav", language="en")
+    """
+    from chiluka import Chiluka
+    if pretrained:
+        # Use from_pretrained to auto-download weights
+        return Chiluka.from_pretrained(device=device, **kwargs)
+    else:
+        # Return model expecting local weights
+        return Chiluka(device=device, **kwargs)
+def chiluka_from_hf(repo_id: str = "yourusername/chiluka-tts", device: str = None, **kwargs):
+    """
+    Load Chiluka TTS from a specific HuggingFace Hub repository.
+    Args:
+        repo_id: HuggingFace Hub repository ID (e.g., 'username/model-name')
+        device: Device to use ('cuda' or 'cpu'). Auto-detects if None.
+        **kwargs: Additional arguments passed to Chiluka constructor.
+    Returns:
+        Chiluka: Initialized TTS model ready for inference.
+    Example:
+        >>> import torch
+        >>> tts = torch.hub.load('yourusername/chiluka', 'chiluka_from_hf',
+        ...                       repo_id='myuser/my-custom-chiluka')
+    """
+    from chiluka import Chiluka
+    return Chiluka.from_pretrained(repo_id=repo_id, device=device, **kwargs)

setup.py CHANGED Viewed

@@ -43,6 +43,7 @@ setup(
         "einops-exts>=0.0.4",
         "numpy>=1.21.0",
         "scipy>=1.7.0",
     ],
     extras_require={
         "playback": ["pyaudio>=0.2.11"],

         "einops-exts>=0.0.4",
         "numpy>=1.21.0",
         "scipy>=1.7.0",
+        "huggingface_hub>=0.16.0",
     ],
     extras_require={
         "playback": ["pyaudio>=0.2.11"],