""" Kokoro TTS - Example Usage Script ================================= This script demonstrates how to use the Kokoro TTS engine programmatically. Useful for understanding the code flow and for batch processing. Run this script after installing dependencies: pip install kokoro soundfile numpy apt-get install espeak-ng # Linux """ import numpy as np import soundfile as sf from kokoro import KPipeline # ============================================================================ # EXAMPLE 1: Basic Text-to-Speech # ============================================================================ def example_basic_tts(): """Generate speech with default settings.""" print("\n" + "="*50) print("Example 1: Basic TTS") print("="*50) # Initialize pipeline for American English pipeline = KPipeline(lang_code='a') # Text to synthesize text = "Hello! This is a demonstration of the Kokoro text to speech model." # Generate audio for i, (graphemes, phonemes, audio) in enumerate(pipeline(text, voice='af_heart')): print(f"Segment {i}:") print(f" Text: {graphemes}") print(f" Phonemes: {phonemes}") # Save audio audio_np = audio.numpy() if hasattr(audio, 'numpy') else audio sf.write(f'example1_segment{i}.wav', audio_np, 24000) print(f" Saved: example1_segment{i}.wav") # ============================================================================ # EXAMPLE 2: Speed Control # ============================================================================ def example_speed_control(): """Generate speech at different speeds.""" print("\n" + "="*50) print("Example 2: Speed Control") print("="*50) pipeline = KPipeline(lang_code='a') text = "The quick brown fox jumps over the lazy dog." speeds = [0.7, 1.0, 1.3] for speed in speeds: print(f"\nGenerating at speed {speed}x...") for _, _, audio in pipeline(text, voice='af_bella', speed=speed): audio_np = audio.numpy() if hasattr(audio, 'numpy') else audio filename = f'example2_speed_{speed}.wav' sf.write(filename, audio_np, 24000) print(f" Saved: {filename}") # ============================================================================ # EXAMPLE 3: Different Voices # ============================================================================ def example_different_voices(): """Compare different voices with the same text.""" print("\n" + "="*50) print("Example 3: Different Voices") print("="*50) # American and British pipelines pipelines = { 'a': KPipeline(lang_code='a'), 'b': KPipeline(lang_code='b'), } text = "Good morning! How are you doing today?" voices = [ ('af_heart', 'American Female - Heart'), ('am_michael', 'American Male - Michael'), ('bf_emma', 'British Female - Emma'), ('bm_george', 'British Male - George'), ] for voice_id, voice_name in voices: print(f"\nGenerating with {voice_name}...") lang_code = voice_id[0] # 'a' or 'b' pipeline = pipelines[lang_code] for _, _, audio in pipeline(text, voice=voice_id): audio_np = audio.numpy() if hasattr(audio, 'numpy') else audio filename = f'example3_{voice_id}.wav' sf.write(filename, audio_np, 24000) print(f" Saved: {filename}") # ============================================================================ # EXAMPLE 4: Combining Audio Segments with Pauses # ============================================================================ def example_pause_insertion(): """Demonstrate inserting pauses between sentences.""" print("\n" + "="*50) print("Example 4: Pause Insertion") print("="*50) pipeline = KPipeline(lang_code='a') # Multiple sentences text = """First sentence of the story. Second sentence with more details. And finally, the conclusion.""" # Collect all audio segments audio_segments = [] for _, _, audio in pipeline(text, voice='af_heart'): audio_np = audio.numpy() if hasattr(audio, 'numpy') else audio audio_segments.append(audio_np) # Create pause (500ms of silence) sample_rate = 24000 pause_duration = 0.5 # seconds silence = np.zeros(int(sample_rate * pause_duration), dtype=np.float32) # Combine with pauses combined = [] for i, segment in enumerate(audio_segments): combined.append(segment) if i < len(audio_segments) - 1: # Don't add pause after last segment combined.append(silence) final_audio = np.concatenate(combined) # Normalize max_val = np.max(np.abs(final_audio)) if max_val > 0: final_audio = final_audio / max_val * 0.9 filename = 'example4_with_pauses.wav' sf.write(filename, final_audio, 24000) print(f" Saved: {filename}") print(f" Duration: {len(final_audio)/24000:.2f} seconds") # ============================================================================ # EXAMPLE 5: Custom Pronunciation # ============================================================================ def example_custom_pronunciation(): """Use phoneme markup for custom pronunciations.""" print("\n" + "="*50) print("Example 5: Custom Pronunciation") print("="*50) pipeline = KPipeline(lang_code='a') # Custom pronunciation using markdown-style markup # [word](/phonemes/) syntax text_normal = "I love Kokoro text to speech." text_custom = "I love [Kokoro](/kˈOkəɹO/) text to speech." print("\nNormal pronunciation:") for _, phonemes, audio in pipeline(text_normal, voice='af_heart'): print(f" Phonemes: {phonemes}") audio_np = audio.numpy() if hasattr(audio, 'numpy') else audio sf.write('example5_normal.wav', audio_np, 24000) print("\nCustom pronunciation:") for _, phonemes, audio in pipeline(text_custom, voice='af_heart'): print(f" Phonemes: {phonemes}") audio_np = audio.numpy() if hasattr(audio, 'numpy') else audio sf.write('example5_custom.wav', audio_np, 24000) # ============================================================================ # MAIN # ============================================================================ if __name__ == "__main__": print("Kokoro TTS - Example Usage") print("==========================") print("This script generates several example audio files.") print("Make sure you have installed: pip install kokoro soundfile") print("And system dependency: apt-get install espeak-ng") try: example_basic_tts() example_speed_control() example_different_voices() example_pause_insertion() example_custom_pronunciation() print("\n" + "="*50) print("All examples completed successfully!") print("Check the current directory for generated .wav files") print("="*50) except ImportError as e: print(f"\nError: {e}") print("Please install required packages:") print(" pip install kokoro soundfile numpy") print(" apt-get install espeak-ng") except Exception as e: print(f"\nError during generation: {e}")