File size: 3,379 Bytes
60fee7c 10ea2f8 60fee7c 10ea2f8 60fee7c 393129e 60fee7c 10ea2f8 60fee7c 10ea2f8 393129e 60fee7c 10ea2f8 60fee7c 10ea2f8 60fee7c 10ea2f8 60fee7c 10ea2f8 60fee7c 10ea2f8 7189a0b 10ea2f8 60fee7c 10ea2f8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 | """
PyTorch Hub configuration for Chiluka TTS.
Usage:
import torch
# Load Hindi-English model (default)
tts = torch.hub.load('Seemanth/chiluka', 'chiluka')
# Load Telugu model
tts = torch.hub.load('Seemanth/chiluka', 'chiluka_telugu')
# Generate speech
wav = tts.synthesize(
text="Hello, world!",
reference_audio="path/to/reference.wav",
language="en-us"
)
"""
dependencies = [
'torch',
'torchaudio',
'transformers',
'librosa',
'phonemizer',
'nltk',
'PyYAML',
'munch',
'einops',
'einops-exts',
'numpy',
'scipy',
'huggingface_hub',
]
def chiluka(pretrained: bool = True, device: str = None, **kwargs):
"""
Load Chiluka Hindi-English TTS model (default).
Args:
pretrained: If True, downloads pretrained weights from HuggingFace Hub.
device: Device to use ('cuda' or 'cpu'). Auto-detects if None.
**kwargs: Additional arguments passed to Chiluka constructor.
Returns:
Chiluka: Initialized TTS model ready for inference.
Example:
>>> import torch
>>> tts = torch.hub.load('Seemanth/chiluka', 'chiluka')
>>> wav = tts.synthesize("Hello!", "reference.wav", language="en-us")
"""
from chiluka import Chiluka
if pretrained:
return Chiluka.from_pretrained(model="hindi_english", device=device, **kwargs)
else:
return Chiluka(device=device, **kwargs)
def chiluka_telugu(pretrained: bool = True, device: str = None, **kwargs):
"""
Load Chiluka Telugu TTS model.
Args:
pretrained: If True, downloads pretrained weights from HuggingFace Hub.
device: Device to use ('cuda' or 'cpu'). Auto-detects if None.
**kwargs: Additional arguments passed to Chiluka constructor.
Returns:
Chiluka: Initialized TTS model ready for inference.
Example:
>>> import torch
>>> tts = torch.hub.load('Seemanth/chiluka', 'chiluka_telugu')
>>> wav = tts.synthesize("నమస్కారం", "reference.wav", language="te")
"""
from chiluka import Chiluka
if pretrained:
return Chiluka.from_pretrained(model="telugu", device=device, **kwargs)
else:
return Chiluka(device=device, **kwargs)
def chiluka_hindi_english(pretrained: bool = True, device: str = None, **kwargs):
"""
Load Chiluka Hindi-English TTS model (explicit name).
Same as `chiluka()` but with an explicit name.
Example:
>>> import torch
>>> tts = torch.hub.load('Seemanth/chiluka', 'chiluka_hindi_english')
"""
return chiluka(pretrained=pretrained, device=device, **kwargs)
def chiluka_from_hf(repo_id: str = "Seemanth/chiluka", model: str = "hindi_english", device: str = None, **kwargs):
"""
Load Chiluka TTS from a specific HuggingFace Hub repository.
Args:
repo_id: HuggingFace Hub repository ID
model: Model variant ('hindi_english' or 'telugu')
device: Device to use ('cuda' or 'cpu'). Auto-detects if None.
Example:
>>> import torch
>>> tts = torch.hub.load('Seemanth/chiluka', 'chiluka_from_hf',
... repo_id='myuser/my-custom-chiluka')
"""
from chiluka import Chiluka
return Chiluka.from_pretrained(repo_id=repo_id, model=model, device=device, **kwargs)
|