Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import numpy as np | |
| import io | |
| import os | |
| from openai import OpenAI | |
| from pydub import AudioSegment | |
| from pydub.playback import play | |
| # Set an environment variable for key | |
| os.environ['OPENAI_API_KEY'] = os.environ.get('OPENAI_API_KEY') | |
| client = OpenAI() # add api_key | |
| def stream_and_yield_audio(text, model, voice): | |
| response = client.audio.speech.create( | |
| model=model, #"tts-1", for example | |
| voice=voice , #"alloy", for example | |
| input=text, | |
| ) | |
| # Convert the binary response content to a byte stream | |
| byte_stream = io.BytesIO(response.content) | |
| # Read the audio data from the byte stream | |
| audio = AudioSegment.from_file(byte_stream, format="mp3") | |
| # Export the audio as WAV format | |
| sample_width = audio.sample_width | |
| sample_rate = audio.frame_rate | |
| audio_data = np.array(audio.get_array_of_samples(), dtype=np.int16) | |
| # Yield the audio data | |
| yield sample_rate, audio_data #audio_data.tobytes(), sample_width | |
| # demo using older gradio version (3.50.2) | |
| with gr.Blocks() as demo: | |
| with gr.Row(): | |
| model = gr.Dropdown(choices=['tts-1','tts-1-hd'], label='Model', value='tts-1') | |
| voice = gr.Dropdown(choices=['alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer'], label='Voice Options', value='alloy') | |
| text = gr.Textbox(label="Input text") | |
| btn = gr.Button("Greet") | |
| output_audio = gr.Audio(label="Speech Output", streaming=True, autoplay=True) | |
| btn.click(fn=stream_and_yield_audio, inputs=[text,model, voice], outputs=output_audio, api_name="tts-stream") | |
| demo.queue().launch() |