Spaces:
Sleeping
Sleeping
| from kokoro import KPipeline | |
| import soundfile as sf | |
| import numpy as np | |
| import logging | |
| pipeline = KPipeline(lang_code="a") | |
| try: | |
| pipeline = pipeline.to("cuda") | |
| except: | |
| logging.warning("CUDA not available, using CPU") | |
| def generate_audio( | |
| text, | |
| voice="af_heart", | |
| speed=1, | |
| save_segments=False, | |
| progress=None, | |
| ): | |
| """ | |
| Generate audio from text using Kokoro TTS pipeline | |
| Args: | |
| text (str): Text to convert to speech | |
| lang_code (str): Language code for the TTS model | |
| voice (str): Voice ID to use | |
| speed (float): Speech speed multiplier | |
| save_segments (bool): Whether to save individual audio segments | |
| Returns: | |
| numpy.ndarray: Combined audio data at 24kHz sample rate | |
| """ | |
| generator = pipeline(text, voice=voice, speed=speed, split_pattern=r"\.") | |
| all_audio = [] | |
| segments = list(generator) # Get total number of segments | |
| for i, (gs, ps, audio) in enumerate( | |
| progress.tqdm(segments, desc="Generating audio") | |
| ): | |
| logging.info("Processing segment") | |
| logging.info(f"Graphemes: {gs}") | |
| logging.info(f"Phonemes: {ps}") | |
| all_audio.append(audio) | |
| if save_segments: | |
| sf.write(f"segment_{i}.wav", audio, 24000) | |
| # Concatenate all audio segments | |
| combined_audio = np.concatenate(all_audio) | |
| return combined_audio | |