Spaces:
Sleeping
Sleeping
| """ | |
| Kokoro TTS - Example Usage Script | |
| ================================= | |
| This script demonstrates how to use the Kokoro TTS engine programmatically. | |
| Useful for understanding the code flow and for batch processing. | |
| Run this script after installing dependencies: | |
| pip install kokoro soundfile numpy | |
| apt-get install espeak-ng # Linux | |
| """ | |
| import numpy as np | |
| import soundfile as sf | |
| from kokoro import KPipeline | |
| # ============================================================================ | |
| # EXAMPLE 1: Basic Text-to-Speech | |
| # ============================================================================ | |
| def example_basic_tts(): | |
| """Generate speech with default settings.""" | |
| print("\n" + "="*50) | |
| print("Example 1: Basic TTS") | |
| print("="*50) | |
| # Initialize pipeline for American English | |
| pipeline = KPipeline(lang_code='a') | |
| # Text to synthesize | |
| text = "Hello! This is a demonstration of the Kokoro text to speech model." | |
| # Generate audio | |
| for i, (graphemes, phonemes, audio) in enumerate(pipeline(text, voice='af_heart')): | |
| print(f"Segment {i}:") | |
| print(f" Text: {graphemes}") | |
| print(f" Phonemes: {phonemes}") | |
| # Save audio | |
| audio_np = audio.numpy() if hasattr(audio, 'numpy') else audio | |
| sf.write(f'example1_segment{i}.wav', audio_np, 24000) | |
| print(f" Saved: example1_segment{i}.wav") | |
| # ============================================================================ | |
| # EXAMPLE 2: Speed Control | |
| # ============================================================================ | |
| def example_speed_control(): | |
| """Generate speech at different speeds.""" | |
| print("\n" + "="*50) | |
| print("Example 2: Speed Control") | |
| print("="*50) | |
| pipeline = KPipeline(lang_code='a') | |
| text = "The quick brown fox jumps over the lazy dog." | |
| speeds = [0.7, 1.0, 1.3] | |
| for speed in speeds: | |
| print(f"\nGenerating at speed {speed}x...") | |
| for _, _, audio in pipeline(text, voice='af_bella', speed=speed): | |
| audio_np = audio.numpy() if hasattr(audio, 'numpy') else audio | |
| filename = f'example2_speed_{speed}.wav' | |
| sf.write(filename, audio_np, 24000) | |
| print(f" Saved: {filename}") | |
| # ============================================================================ | |
| # EXAMPLE 3: Different Voices | |
| # ============================================================================ | |
| def example_different_voices(): | |
| """Compare different voices with the same text.""" | |
| print("\n" + "="*50) | |
| print("Example 3: Different Voices") | |
| print("="*50) | |
| # American and British pipelines | |
| pipelines = { | |
| 'a': KPipeline(lang_code='a'), | |
| 'b': KPipeline(lang_code='b'), | |
| } | |
| text = "Good morning! How are you doing today?" | |
| voices = [ | |
| ('af_heart', 'American Female - Heart'), | |
| ('am_michael', 'American Male - Michael'), | |
| ('bf_emma', 'British Female - Emma'), | |
| ('bm_george', 'British Male - George'), | |
| ] | |
| for voice_id, voice_name in voices: | |
| print(f"\nGenerating with {voice_name}...") | |
| lang_code = voice_id[0] # 'a' or 'b' | |
| pipeline = pipelines[lang_code] | |
| for _, _, audio in pipeline(text, voice=voice_id): | |
| audio_np = audio.numpy() if hasattr(audio, 'numpy') else audio | |
| filename = f'example3_{voice_id}.wav' | |
| sf.write(filename, audio_np, 24000) | |
| print(f" Saved: {filename}") | |
| # ============================================================================ | |
| # EXAMPLE 4: Combining Audio Segments with Pauses | |
| # ============================================================================ | |
| def example_pause_insertion(): | |
| """Demonstrate inserting pauses between sentences.""" | |
| print("\n" + "="*50) | |
| print("Example 4: Pause Insertion") | |
| print("="*50) | |
| pipeline = KPipeline(lang_code='a') | |
| # Multiple sentences | |
| text = """First sentence of the story. | |
| Second sentence with more details. | |
| And finally, the conclusion.""" | |
| # Collect all audio segments | |
| audio_segments = [] | |
| for _, _, audio in pipeline(text, voice='af_heart'): | |
| audio_np = audio.numpy() if hasattr(audio, 'numpy') else audio | |
| audio_segments.append(audio_np) | |
| # Create pause (500ms of silence) | |
| sample_rate = 24000 | |
| pause_duration = 0.5 # seconds | |
| silence = np.zeros(int(sample_rate * pause_duration), dtype=np.float32) | |
| # Combine with pauses | |
| combined = [] | |
| for i, segment in enumerate(audio_segments): | |
| combined.append(segment) | |
| if i < len(audio_segments) - 1: # Don't add pause after last segment | |
| combined.append(silence) | |
| final_audio = np.concatenate(combined) | |
| # Normalize | |
| max_val = np.max(np.abs(final_audio)) | |
| if max_val > 0: | |
| final_audio = final_audio / max_val * 0.9 | |
| filename = 'example4_with_pauses.wav' | |
| sf.write(filename, final_audio, 24000) | |
| print(f" Saved: {filename}") | |
| print(f" Duration: {len(final_audio)/24000:.2f} seconds") | |
| # ============================================================================ | |
| # EXAMPLE 5: Custom Pronunciation | |
| # ============================================================================ | |
| def example_custom_pronunciation(): | |
| """Use phoneme markup for custom pronunciations.""" | |
| print("\n" + "="*50) | |
| print("Example 5: Custom Pronunciation") | |
| print("="*50) | |
| pipeline = KPipeline(lang_code='a') | |
| # Custom pronunciation using markdown-style markup | |
| # [word](/phonemes/) syntax | |
| text_normal = "I love Kokoro text to speech." | |
| text_custom = "I love [Kokoro](/kˈOkəɹO/) text to speech." | |
| print("\nNormal pronunciation:") | |
| for _, phonemes, audio in pipeline(text_normal, voice='af_heart'): | |
| print(f" Phonemes: {phonemes}") | |
| audio_np = audio.numpy() if hasattr(audio, 'numpy') else audio | |
| sf.write('example5_normal.wav', audio_np, 24000) | |
| print("\nCustom pronunciation:") | |
| for _, phonemes, audio in pipeline(text_custom, voice='af_heart'): | |
| print(f" Phonemes: {phonemes}") | |
| audio_np = audio.numpy() if hasattr(audio, 'numpy') else audio | |
| sf.write('example5_custom.wav', audio_np, 24000) | |
| # ============================================================================ | |
| # MAIN | |
| # ============================================================================ | |
| if __name__ == "__main__": | |
| print("Kokoro TTS - Example Usage") | |
| print("==========================") | |
| print("This script generates several example audio files.") | |
| print("Make sure you have installed: pip install kokoro soundfile") | |
| print("And system dependency: apt-get install espeak-ng") | |
| try: | |
| example_basic_tts() | |
| example_speed_control() | |
| example_different_voices() | |
| example_pause_insertion() | |
| example_custom_pronunciation() | |
| print("\n" + "="*50) | |
| print("All examples completed successfully!") | |
| print("Check the current directory for generated .wav files") | |
| print("="*50) | |
| except ImportError as e: | |
| print(f"\nError: {e}") | |
| print("Please install required packages:") | |
| print(" pip install kokoro soundfile numpy") | |
| print(" apt-get install espeak-ng") | |
| except Exception as e: | |
| print(f"\nError during generation: {e}") | |