Spaces:

YashChowdhary
/

Text_To_Speech

Sleeping

App Files Files Community

Text_To_Speech / examples.py

YashChowdhary

Upload 7 files

7f7498c verified about 2 months ago

raw

history blame contribute delete

7.45 kB

	"""
	Kokoro TTS - Example Usage Script
	=================================
	This script demonstrates how to use the Kokoro TTS engine programmatically.
	Useful for understanding the code flow and for batch processing.

	Run this script after installing dependencies:
	pip install kokoro soundfile numpy
	apt-get install espeak-ng # Linux
	"""

	import numpy as np
	import soundfile as sf
	from kokoro import KPipeline

	# ============================================================================
	# EXAMPLE 1: Basic Text-to-Speech
	# ============================================================================

	def example_basic_tts():
	"""Generate speech with default settings."""
	print("\n" + "="*50)
	print("Example 1: Basic TTS")
	print("="*50)

	# Initialize pipeline for American English
	pipeline = KPipeline(lang_code='a')

	# Text to synthesize
	text = "Hello! This is a demonstration of the Kokoro text to speech model."

	# Generate audio
	for i, (graphemes, phonemes, audio) in enumerate(pipeline(text, voice='af_heart')):
	print(f"Segment {i}:")
	print(f" Text: {graphemes}")
	print(f" Phonemes: {phonemes}")

	# Save audio
	audio_np = audio.numpy() if hasattr(audio, 'numpy') else audio
	sf.write(f'example1_segment{i}.wav', audio_np, 24000)
	print(f" Saved: example1_segment{i}.wav")


	# ============================================================================
	# EXAMPLE 2: Speed Control
	# ============================================================================

	def example_speed_control():
	"""Generate speech at different speeds."""
	print("\n" + "="*50)
	print("Example 2: Speed Control")
	print("="*50)

	pipeline = KPipeline(lang_code='a')
	text = "The quick brown fox jumps over the lazy dog."

	speeds = [0.7, 1.0, 1.3]

	for speed in speeds:
	print(f"\nGenerating at speed {speed}x...")

	for _, _, audio in pipeline(text, voice='af_bella', speed=speed):
	audio_np = audio.numpy() if hasattr(audio, 'numpy') else audio
	filename = f'example2_speed_{speed}.wav'
	sf.write(filename, audio_np, 24000)
	print(f" Saved: {filename}")


	# ============================================================================
	# EXAMPLE 3: Different Voices
	# ============================================================================

	def example_different_voices():
	"""Compare different voices with the same text."""
	print("\n" + "="*50)
	print("Example 3: Different Voices")
	print("="*50)

	# American and British pipelines
	pipelines = {
	'a': KPipeline(lang_code='a'),
	'b': KPipeline(lang_code='b'),
	}

	text = "Good morning! How are you doing today?"

	voices = [
	('af_heart', 'American Female - Heart'),
	('am_michael', 'American Male - Michael'),
	('bf_emma', 'British Female - Emma'),
	('bm_george', 'British Male - George'),
	]

	for voice_id, voice_name in voices:
	print(f"\nGenerating with {voice_name}...")

	lang_code = voice_id[0] # 'a' or 'b'
	pipeline = pipelines[lang_code]

	for _, _, audio in pipeline(text, voice=voice_id):
	audio_np = audio.numpy() if hasattr(audio, 'numpy') else audio
	filename = f'example3_{voice_id}.wav'
	sf.write(filename, audio_np, 24000)
	print(f" Saved: {filename}")


	# ============================================================================
	# EXAMPLE 4: Combining Audio Segments with Pauses
	# ============================================================================

	def example_pause_insertion():
	"""Demonstrate inserting pauses between sentences."""
	print("\n" + "="*50)
	print("Example 4: Pause Insertion")
	print("="*50)

	pipeline = KPipeline(lang_code='a')

	# Multiple sentences
	text = """First sentence of the story.
	Second sentence with more details.
	And finally, the conclusion."""

	# Collect all audio segments
	audio_segments = []

	for _, _, audio in pipeline(text, voice='af_heart'):
	audio_np = audio.numpy() if hasattr(audio, 'numpy') else audio
	audio_segments.append(audio_np)

	# Create pause (500ms of silence)
	sample_rate = 24000
	pause_duration = 0.5 # seconds
	silence = np.zeros(int(sample_rate * pause_duration), dtype=np.float32)

	# Combine with pauses
	combined = []
	for i, segment in enumerate(audio_segments):
	combined.append(segment)
	if i < len(audio_segments) - 1: # Don't add pause after last segment
	combined.append(silence)

	final_audio = np.concatenate(combined)

	# Normalize
	max_val = np.max(np.abs(final_audio))
	if max_val > 0:
	final_audio = final_audio / max_val * 0.9

	filename = 'example4_with_pauses.wav'
	sf.write(filename, final_audio, 24000)
	print(f" Saved: {filename}")
	print(f" Duration: {len(final_audio)/24000:.2f} seconds")


	# ============================================================================
	# EXAMPLE 5: Custom Pronunciation
	# ============================================================================

	def example_custom_pronunciation():
	"""Use phoneme markup for custom pronunciations."""
	print("\n" + "="*50)
	print("Example 5: Custom Pronunciation")
	print("="*50)

	pipeline = KPipeline(lang_code='a')

	# Custom pronunciation using markdown-style markup
	# [word](/phonemes/) syntax
	text_normal = "I love Kokoro text to speech."
	text_custom = "I love [Kokoro](/kˈOkəɹO/) text to speech."

	print("\nNormal pronunciation:")
	for _, phonemes, audio in pipeline(text_normal, voice='af_heart'):
	print(f" Phonemes: {phonemes}")
	audio_np = audio.numpy() if hasattr(audio, 'numpy') else audio
	sf.write('example5_normal.wav', audio_np, 24000)

	print("\nCustom pronunciation:")
	for _, phonemes, audio in pipeline(text_custom, voice='af_heart'):
	print(f" Phonemes: {phonemes}")
	audio_np = audio.numpy() if hasattr(audio, 'numpy') else audio
	sf.write('example5_custom.wav', audio_np, 24000)


	# ============================================================================
	# MAIN
	# ============================================================================

	if __name__ == "__main__":
	print("Kokoro TTS - Example Usage")
	print("==========================")
	print("This script generates several example audio files.")
	print("Make sure you have installed: pip install kokoro soundfile")
	print("And system dependency: apt-get install espeak-ng")

	try:
	example_basic_tts()
	example_speed_control()
	example_different_voices()
	example_pause_insertion()
	example_custom_pronunciation()

	print("\n" + "="*50)
	print("All examples completed successfully!")
	print("Check the current directory for generated .wav files")
	print("="*50)

	except ImportError as e:
	print(f"\nError: {e}")
	print("Please install required packages:")
	print(" pip install kokoro soundfile numpy")
	print(" apt-get install espeak-ng")
	except Exception as e:
	print(f"\nError during generation: {e}")