Seemanth
/

chiluka

Model card Files Files and versions

chiluka / examples /huggingface_example.py

seemanthraju

Added streaming funciton

393129e 5 days ago

history blame contribute delete

2.55 kB

	"""
	Chiluka TTS - HuggingFace Hub Example

	Load model weights directly from HuggingFace Hub.
	No need to clone the repository or download weights manually.

	Requirements:
	pip install chiluka
	sudo apt-get install espeak-ng

	Usage:
	python huggingface_example.py --reference path/to/reference.wav
	python huggingface_example.py --reference ref.wav --model telugu --language te --text "నమస్కారం"
	"""

	import argparse
	from chiluka import Chiluka, list_models


	def main():
	parser = argparse.ArgumentParser(description="Chiluka TTS - HuggingFace Hub Example")
	parser.add_argument("--reference", type=str, required=True, help="Path to reference audio file")
	parser.add_argument("--model", type=str, default="hindi_english", choices=["hindi_english", "telugu"],
	help="Model variant to use (default: hindi_english)")
	parser.add_argument("--text", type=str, default=None, help="Text to synthesize")
	parser.add_argument("--language", type=str, default=None, help="Language code (en-us, hi, te)")
	parser.add_argument("--output", type=str, default="output_hf.wav", help="Output wav file path")
	parser.add_argument("--device", type=str, default=None, help="Device: cuda or cpu")
	args = parser.parse_args()

	# Show available models
	print("Available models:")
	for name, info in list_models().items():
	marker = " <--" if name == args.model else ""
	print(f" {name}: {info['description']}{marker}")
	print()

	# Set defaults based on model choice
	if args.text is None:
	if args.model == "telugu":
	args.text = "నమస్కారం, నేను చిలుక మాట్లాడుతున్నాను"
	else:
	args.text = "Hello, I am Chiluka, a text to speech system."

	if args.language is None:
	if args.model == "telugu":
	args.language = "te"
	else:
	args.language = "en-us"

	# Load model from HuggingFace Hub (auto-downloads on first use)
	print(f"Loading '{args.model}' model from HuggingFace Hub...")
	tts = Chiluka.from_pretrained(model=args.model, device=args.device)

	# Synthesize
	print(f"Synthesizing: '{args.text}'")
	print(f"Language: {args.language}")
	wav = tts.synthesize(
	text=args.text,
	reference_audio=args.reference,
	language=args.language,
	)

	# Save
	tts.save_wav(wav, args.output)
	print(f"Duration: {len(wav) / 24000:.2f} seconds")


	if __name__ == "__main__":
	main()