Text_To_Speech / examples.py
YashChowdhary's picture
Upload 7 files
7f7498c verified
"""
Kokoro TTS - Example Usage Script
=================================
This script demonstrates how to use the Kokoro TTS engine programmatically.
Useful for understanding the code flow and for batch processing.
Run this script after installing dependencies:
pip install kokoro soundfile numpy
apt-get install espeak-ng # Linux
"""
import numpy as np
import soundfile as sf
from kokoro import KPipeline
# ============================================================================
# EXAMPLE 1: Basic Text-to-Speech
# ============================================================================
def example_basic_tts():
"""Generate speech with default settings."""
print("\n" + "="*50)
print("Example 1: Basic TTS")
print("="*50)
# Initialize pipeline for American English
pipeline = KPipeline(lang_code='a')
# Text to synthesize
text = "Hello! This is a demonstration of the Kokoro text to speech model."
# Generate audio
for i, (graphemes, phonemes, audio) in enumerate(pipeline(text, voice='af_heart')):
print(f"Segment {i}:")
print(f" Text: {graphemes}")
print(f" Phonemes: {phonemes}")
# Save audio
audio_np = audio.numpy() if hasattr(audio, 'numpy') else audio
sf.write(f'example1_segment{i}.wav', audio_np, 24000)
print(f" Saved: example1_segment{i}.wav")
# ============================================================================
# EXAMPLE 2: Speed Control
# ============================================================================
def example_speed_control():
"""Generate speech at different speeds."""
print("\n" + "="*50)
print("Example 2: Speed Control")
print("="*50)
pipeline = KPipeline(lang_code='a')
text = "The quick brown fox jumps over the lazy dog."
speeds = [0.7, 1.0, 1.3]
for speed in speeds:
print(f"\nGenerating at speed {speed}x...")
for _, _, audio in pipeline(text, voice='af_bella', speed=speed):
audio_np = audio.numpy() if hasattr(audio, 'numpy') else audio
filename = f'example2_speed_{speed}.wav'
sf.write(filename, audio_np, 24000)
print(f" Saved: {filename}")
# ============================================================================
# EXAMPLE 3: Different Voices
# ============================================================================
def example_different_voices():
"""Compare different voices with the same text."""
print("\n" + "="*50)
print("Example 3: Different Voices")
print("="*50)
# American and British pipelines
pipelines = {
'a': KPipeline(lang_code='a'),
'b': KPipeline(lang_code='b'),
}
text = "Good morning! How are you doing today?"
voices = [
('af_heart', 'American Female - Heart'),
('am_michael', 'American Male - Michael'),
('bf_emma', 'British Female - Emma'),
('bm_george', 'British Male - George'),
]
for voice_id, voice_name in voices:
print(f"\nGenerating with {voice_name}...")
lang_code = voice_id[0] # 'a' or 'b'
pipeline = pipelines[lang_code]
for _, _, audio in pipeline(text, voice=voice_id):
audio_np = audio.numpy() if hasattr(audio, 'numpy') else audio
filename = f'example3_{voice_id}.wav'
sf.write(filename, audio_np, 24000)
print(f" Saved: {filename}")
# ============================================================================
# EXAMPLE 4: Combining Audio Segments with Pauses
# ============================================================================
def example_pause_insertion():
"""Demonstrate inserting pauses between sentences."""
print("\n" + "="*50)
print("Example 4: Pause Insertion")
print("="*50)
pipeline = KPipeline(lang_code='a')
# Multiple sentences
text = """First sentence of the story.
Second sentence with more details.
And finally, the conclusion."""
# Collect all audio segments
audio_segments = []
for _, _, audio in pipeline(text, voice='af_heart'):
audio_np = audio.numpy() if hasattr(audio, 'numpy') else audio
audio_segments.append(audio_np)
# Create pause (500ms of silence)
sample_rate = 24000
pause_duration = 0.5 # seconds
silence = np.zeros(int(sample_rate * pause_duration), dtype=np.float32)
# Combine with pauses
combined = []
for i, segment in enumerate(audio_segments):
combined.append(segment)
if i < len(audio_segments) - 1: # Don't add pause after last segment
combined.append(silence)
final_audio = np.concatenate(combined)
# Normalize
max_val = np.max(np.abs(final_audio))
if max_val > 0:
final_audio = final_audio / max_val * 0.9
filename = 'example4_with_pauses.wav'
sf.write(filename, final_audio, 24000)
print(f" Saved: {filename}")
print(f" Duration: {len(final_audio)/24000:.2f} seconds")
# ============================================================================
# EXAMPLE 5: Custom Pronunciation
# ============================================================================
def example_custom_pronunciation():
"""Use phoneme markup for custom pronunciations."""
print("\n" + "="*50)
print("Example 5: Custom Pronunciation")
print("="*50)
pipeline = KPipeline(lang_code='a')
# Custom pronunciation using markdown-style markup
# [word](/phonemes/) syntax
text_normal = "I love Kokoro text to speech."
text_custom = "I love [Kokoro](/kˈOkəɹO/) text to speech."
print("\nNormal pronunciation:")
for _, phonemes, audio in pipeline(text_normal, voice='af_heart'):
print(f" Phonemes: {phonemes}")
audio_np = audio.numpy() if hasattr(audio, 'numpy') else audio
sf.write('example5_normal.wav', audio_np, 24000)
print("\nCustom pronunciation:")
for _, phonemes, audio in pipeline(text_custom, voice='af_heart'):
print(f" Phonemes: {phonemes}")
audio_np = audio.numpy() if hasattr(audio, 'numpy') else audio
sf.write('example5_custom.wav', audio_np, 24000)
# ============================================================================
# MAIN
# ============================================================================
if __name__ == "__main__":
print("Kokoro TTS - Example Usage")
print("==========================")
print("This script generates several example audio files.")
print("Make sure you have installed: pip install kokoro soundfile")
print("And system dependency: apt-get install espeak-ng")
try:
example_basic_tts()
example_speed_control()
example_different_voices()
example_pause_insertion()
example_custom_pronunciation()
print("\n" + "="*50)
print("All examples completed successfully!")
print("Check the current directory for generated .wav files")
print("="*50)
except ImportError as e:
print(f"\nError: {e}")
print("Please install required packages:")
print(" pip install kokoro soundfile numpy")
print(" apt-get install espeak-ng")
except Exception as e:
print(f"\nError during generation: {e}")