File size: 3,379 Bytes

60fee7c
 
 
 
 
 
10ea2f8
 
60fee7c
10ea2f8
 
60fee7c
 
 
 
 
393129e
60fee7c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10ea2f8
60fee7c
 
 
 
 
 
 
 
 
 
 
10ea2f8
393129e
60fee7c
 
 
 
10ea2f8
60fee7c
 
 
 
10ea2f8
60fee7c
10ea2f8
60fee7c
 
10ea2f8
60fee7c
 
 
 
 
 
 
 
10ea2f8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7189a0b
10ea2f8
 
 
 
 
 
 
 
 
 
 
60fee7c
 
 
10ea2f8

"""
PyTorch Hub configuration for Chiluka TTS.

Usage:
    import torch

    # Load Hindi-English model (default)
    tts = torch.hub.load('Seemanth/chiluka', 'chiluka')

    # Load Telugu model
    tts = torch.hub.load('Seemanth/chiluka', 'chiluka_telugu')

    # Generate speech
    wav = tts.synthesize(
        text="Hello, world!",
        reference_audio="path/to/reference.wav",
        language="en-us"
    )
"""

dependencies = [
    'torch',
    'torchaudio',
    'transformers',
    'librosa',
    'phonemizer',
    'nltk',
    'PyYAML',
    'munch',
    'einops',
    'einops-exts',
    'numpy',
    'scipy',
    'huggingface_hub',
]


def chiluka(pretrained: bool = True, device: str = None, **kwargs):
    """
    Load Chiluka Hindi-English TTS model (default).

    Args:
        pretrained: If True, downloads pretrained weights from HuggingFace Hub.
        device: Device to use ('cuda' or 'cpu'). Auto-detects if None.
        **kwargs: Additional arguments passed to Chiluka constructor.

    Returns:
        Chiluka: Initialized TTS model ready for inference.

    Example:
        >>> import torch
        >>> tts = torch.hub.load('Seemanth/chiluka', 'chiluka')
        >>> wav = tts.synthesize("Hello!", "reference.wav", language="en-us")
    """
    from chiluka import Chiluka

    if pretrained:
        return Chiluka.from_pretrained(model="hindi_english", device=device, **kwargs)
    else:
        return Chiluka(device=device, **kwargs)


def chiluka_telugu(pretrained: bool = True, device: str = None, **kwargs):
    """
    Load Chiluka Telugu TTS model.

    Args:
        pretrained: If True, downloads pretrained weights from HuggingFace Hub.
        device: Device to use ('cuda' or 'cpu'). Auto-detects if None.
        **kwargs: Additional arguments passed to Chiluka constructor.

    Returns:
        Chiluka: Initialized TTS model ready for inference.

    Example:
        >>> import torch
        >>> tts = torch.hub.load('Seemanth/chiluka', 'chiluka_telugu')
        >>> wav = tts.synthesize("నమస్కారం", "reference.wav", language="te")
    """
    from chiluka import Chiluka

    if pretrained:
        return Chiluka.from_pretrained(model="telugu", device=device, **kwargs)
    else:
        return Chiluka(device=device, **kwargs)


def chiluka_hindi_english(pretrained: bool = True, device: str = None, **kwargs):
    """
    Load Chiluka Hindi-English TTS model (explicit name).

    Same as `chiluka()` but with an explicit name.

    Example:
        >>> import torch
        >>> tts = torch.hub.load('Seemanth/chiluka', 'chiluka_hindi_english')
    """
    return chiluka(pretrained=pretrained, device=device, **kwargs)


def chiluka_from_hf(repo_id: str = "Seemanth/chiluka", model: str = "hindi_english", device: str = None, **kwargs):
    """
    Load Chiluka TTS from a specific HuggingFace Hub repository.

    Args:
        repo_id: HuggingFace Hub repository ID
        model: Model variant ('hindi_english' or 'telugu')
        device: Device to use ('cuda' or 'cpu'). Auto-detects if None.

    Example:
        >>> import torch
        >>> tts = torch.hub.load('Seemanth/chiluka', 'chiluka_from_hf',
        ...                       repo_id='myuser/my-custom-chiluka')
    """
    from chiluka import Chiluka
    return Chiluka.from_pretrained(repo_id=repo_id, model=model, device=device, **kwargs)