""" Chiluka TTS - HuggingFace Hub Example Load model weights directly from HuggingFace Hub. No need to clone the repository or download weights manually. Requirements: pip install chiluka sudo apt-get install espeak-ng Usage: python huggingface_example.py --reference path/to/reference.wav python huggingface_example.py --reference ref.wav --model telugu --language te --text "నమస్కారం" """ import argparse from chiluka import Chiluka, list_models def main(): parser = argparse.ArgumentParser(description="Chiluka TTS - HuggingFace Hub Example") parser.add_argument("--reference", type=str, required=True, help="Path to reference audio file") parser.add_argument("--model", type=str, default="hindi_english", choices=["hindi_english", "telugu"], help="Model variant to use (default: hindi_english)") parser.add_argument("--text", type=str, default=None, help="Text to synthesize") parser.add_argument("--language", type=str, default=None, help="Language code (en-us, hi, te)") parser.add_argument("--output", type=str, default="output_hf.wav", help="Output wav file path") parser.add_argument("--device", type=str, default=None, help="Device: cuda or cpu") args = parser.parse_args() # Show available models print("Available models:") for name, info in list_models().items(): marker = " <--" if name == args.model else "" print(f" {name}: {info['description']}{marker}") print() # Set defaults based on model choice if args.text is None: if args.model == "telugu": args.text = "నమస్కారం, నేను చిలుక మాట్లాడుతున్నాను" else: args.text = "Hello, I am Chiluka, a text to speech system." if args.language is None: if args.model == "telugu": args.language = "te" else: args.language = "en-us" # Load model from HuggingFace Hub (auto-downloads on first use) print(f"Loading '{args.model}' model from HuggingFace Hub...") tts = Chiluka.from_pretrained(model=args.model, device=args.device) # Synthesize print(f"Synthesizing: '{args.text}'") print(f"Language: {args.language}") wav = tts.synthesize( text=args.text, reference_audio=args.reference, language=args.language, ) # Save tts.save_wav(wav, args.output) print(f"Duration: {len(wav) / 24000:.2f} seconds") if __name__ == "__main__": main()