Seemanth
/

chiluka

Model card Files Files and versions

chiluka / examples /torchhub_example.py

seemanthraju

Added streaming funciton

393129e 6 days ago

history blame contribute delete

2.41 kB

	"""
	Chiluka TTS - PyTorch Hub Example

	Load the model using torch.hub.load() - no pip install needed,
	just PyTorch and a GitHub repo.

	Requirements:
	pip install torch torchaudio
	sudo apt-get install espeak-ng

	Usage:
	python torchhub_example.py --reference path/to/reference.wav
	python torchhub_example.py --reference ref.wav --variant telugu --language te
	"""

	import argparse
	import torch


	def main():
	parser = argparse.ArgumentParser(description="Chiluka TTS - PyTorch Hub Example")
	parser.add_argument("--reference", type=str, required=True, help="Path to reference audio file")
	parser.add_argument("--variant", type=str, default="default", choices=["default", "telugu", "hindi_english"],
	help="Model variant (default, telugu, hindi_english)")
	parser.add_argument("--text", type=str, default=None, help="Text to synthesize")
	parser.add_argument("--language", type=str, default=None, help="Language code (en-us, hi, te)")
	parser.add_argument("--output", type=str, default="output_torchhub.wav", help="Output wav file path")
	args = parser.parse_args()

	# Set defaults
	if args.text is None:
	if args.variant == "telugu":
	args.text = "నమస్కారం, నేను చిలుక మాట్లాడుతున్నాను"
	else:
	args.text = "Hello, I am Chiluka, a text to speech system."

	if args.language is None:
	if args.variant == "telugu":
	args.language = "te"
	else:
	args.language = "en-us"

	# Load via torch.hub
	# Available entry points:
	# 'chiluka' - Hindi-English model (default)
	# 'chiluka_telugu' - Telugu model
	# 'chiluka_hindi_english' - Hindi-English model (explicit)
	print(f"Loading model via torch.hub (variant: {args.variant})...")

	if args.variant == "telugu":
	tts = torch.hub.load('Seemanth/chiluka', 'chiluka_telugu')
	else:
	tts = torch.hub.load('Seemanth/chiluka', 'chiluka')

	# Synthesize
	print(f"Synthesizing: '{args.text}'")
	print(f"Language: {args.language}")
	wav = tts.synthesize(
	text=args.text,
	reference_audio=args.reference,
	language=args.language,
	)

	# Save
	tts.save_wav(wav, args.output)
	print(f"Duration: {len(wav) / 24000:.2f} seconds")


	if __name__ == "__main__":
	main()