chiluka / examples /pip_example.py
seemanthraju
Added streaming funciton
393129e
"""
Chiluka TTS - pip install Example
After installing via pip, model weights auto-download from HuggingFace
on first use and are cached locally.
Install:
pip install chiluka
sudo apt-get install espeak-ng
Usage:
python pip_example.py --reference path/to/reference.wav
python pip_example.py --reference ref.wav --model telugu --language te
"""
import argparse
def main():
parser = argparse.ArgumentParser(description="Chiluka TTS - pip Example")
parser.add_argument("--reference", type=str, required=True, help="Path to reference audio file")
parser.add_argument("--model", type=str, default="hindi_english", choices=["hindi_english", "telugu"],
help="Model variant (default: hindi_english)")
parser.add_argument("--text", type=str, default=None, help="Text to synthesize")
parser.add_argument("--language", type=str, default=None, help="Language code (en-us, hi, te)")
parser.add_argument("--output", type=str, default="output_pip.wav", help="Output wav file path")
args = parser.parse_args()
# Import after argparse so --help is fast
from chiluka import Chiluka, list_models
# Set defaults
if args.text is None:
texts = {
"hindi_english": "Hello, I am Chiluka, a text to speech system.",
"telugu": "నమస్కారం, నేను చిలుక మాట్లాడుతున్నాను",
}
args.text = texts[args.model]
if args.language is None:
langs = {"hindi_english": "en-us", "telugu": "te"}
args.language = langs[args.model]
# List models
print("Available models:")
for name, info in list_models().items():
print(f" {name}: {info['description']}")
print()
# Load model (auto-downloads weights on first run)
print(f"Loading '{args.model}' model...")
tts = Chiluka.from_pretrained(model=args.model)
# Synthesize speech
print(f"Text: '{args.text}'")
print(f"Language: {args.language}")
print(f"Reference: {args.reference}")
print()
wav = tts.synthesize(
text=args.text,
reference_audio=args.reference,
language=args.language,
alpha=0.3,
beta=0.7,
diffusion_steps=5,
embedding_scale=1.0,
)
# Save output
tts.save_wav(wav, args.output)
print(f"Duration: {len(wav) / 24000:.2f} seconds")
# --- Bonus: synthesize in another language with same model ---
if args.model == "hindi_english":
print("\n--- Bonus: Hindi synthesis with same model ---")
hindi_wav = tts.synthesize(
text="नमस्ते, मैं चिलुका बोल रहा हूं",
reference_audio=args.reference,
language="hi",
)
hindi_output = args.output.replace(".wav", "_hindi.wav")
tts.save_wav(hindi_wav, hindi_output)
print(f"Duration: {len(hindi_wav) / 24000:.2f} seconds")
if __name__ == "__main__":
main()