|
|
""" |
|
|
Chiluka TTS - PyTorch Hub Example |
|
|
|
|
|
Load the model using torch.hub.load() - no pip install needed, |
|
|
just PyTorch and a GitHub repo. |
|
|
|
|
|
Requirements: |
|
|
pip install torch torchaudio |
|
|
sudo apt-get install espeak-ng |
|
|
|
|
|
Usage: |
|
|
python torchhub_example.py --reference path/to/reference.wav |
|
|
python torchhub_example.py --reference ref.wav --variant telugu --language te |
|
|
""" |
|
|
|
|
|
import argparse |
|
|
import torch |
|
|
|
|
|
|
|
|
def main(): |
|
|
parser = argparse.ArgumentParser(description="Chiluka TTS - PyTorch Hub Example") |
|
|
parser.add_argument("--reference", type=str, required=True, help="Path to reference audio file") |
|
|
parser.add_argument("--variant", type=str, default="default", choices=["default", "telugu", "hindi_english"], |
|
|
help="Model variant (default, telugu, hindi_english)") |
|
|
parser.add_argument("--text", type=str, default=None, help="Text to synthesize") |
|
|
parser.add_argument("--language", type=str, default=None, help="Language code (en-us, hi, te)") |
|
|
parser.add_argument("--output", type=str, default="output_torchhub.wav", help="Output wav file path") |
|
|
args = parser.parse_args() |
|
|
|
|
|
|
|
|
if args.text is None: |
|
|
if args.variant == "telugu": |
|
|
args.text = "నమస్కారం, నేను చిలుక మాట్లాడుతున్నాను" |
|
|
else: |
|
|
args.text = "Hello, I am Chiluka, a text to speech system." |
|
|
|
|
|
if args.language is None: |
|
|
if args.variant == "telugu": |
|
|
args.language = "te" |
|
|
else: |
|
|
args.language = "en-us" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print(f"Loading model via torch.hub (variant: {args.variant})...") |
|
|
|
|
|
if args.variant == "telugu": |
|
|
tts = torch.hub.load('Seemanth/chiluka', 'chiluka_telugu') |
|
|
else: |
|
|
tts = torch.hub.load('Seemanth/chiluka', 'chiluka') |
|
|
|
|
|
|
|
|
print(f"Synthesizing: '{args.text}'") |
|
|
print(f"Language: {args.language}") |
|
|
wav = tts.synthesize( |
|
|
text=args.text, |
|
|
reference_audio=args.reference, |
|
|
language=args.language, |
|
|
) |
|
|
|
|
|
|
|
|
tts.save_wav(wav, args.output) |
|
|
print(f"Duration: {len(wav) / 24000:.2f} seconds") |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |
|
|
|