# app for huggingface space # convert text to voice import gradio as gr from kokoro import KPipeline import soundfile as sf import tempfile import os import numpy as np # List of available voices (abbreviated for brevity) voices = [ "af_heart", "af_alloy", "af_aoede", "af_bella", "af_jessica", "af_kore", "af_nicole", "af_nova", "af_river", "af_sarah", "af_sky", "am_adam", "am_echo", "am_eric", "am_fenrir", "am_liam", "am_michael", "bf_alice", "bf_emma", "bf_isabella", "bf_lily", "bm_daniel", "bm_fable", "bm_george", "bm_lewis" ] def generate_tts(text, file, voice, speed): # Read text from file if uploaded, else use pasted text if file is not None: with open(file.name, 'r', encoding='utf-8') as f: text = f.read() if not text or len(text.strip()) == 0: return None # No input to process # Choose language code based on voice prefix lang_code = 'a' if voice.startswith('af_') or voice.startswith('am_') else 'b' pipeline = KPipeline(lang_code=lang_code) # Generate audio for each paragraph and collect segments audio_segments = [] for i, (gs, ps, audio) in enumerate( pipeline(text, voice=voice, speed=speed, split_pattern=r'\n+') ): audio_segments.append(audio) if not audio_segments: return None # Concatenate all audio segments combined_audio = np.concatenate(audio_segments) # Save the combined audio to a temp file with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmpfile: sf.write(tmpfile.name, combined_audio, 24000, format='WAV') audio_path = tmpfile.name return audio_path # Gradio will handle playback and download gr.Interface( fn=generate_tts, inputs=[ gr.Textbox(label="Paste text here (ignored if file uploaded)", lines=5), gr.File(label="Or upload a .txt file"), gr.Dropdown(choices=voices, label="Select Voice", value=voices[0]), gr.Slider(minimum=0.5, maximum=2.0, value=1.0, step=0.1, label="Speech Speed"), ], outputs=gr.Audio(label="Generated Speech", type="filepath"), title="Kokoro Text-to-Speech", description="Paste text or upload a .txt file, select a voice, and generate speech. You can play and download the generated audio." ).launch(share=True)