Spaces:

markllego
/

synthesizer

Runtime error

App Files Files Community

markllego commited on Nov 7, 2023

Commit

524b895

1 Parent(s): 66dc160

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -11

app.py CHANGED Viewed

@@ -9,9 +9,10 @@ os.environ['OPENAI_API_KEY'] = api_key
 openai_client = OpenAI()
 def synthesize_speech(input_text, selected_model, selected_voice, audio_format):
-    # You would need to check the OpenAI documentation for the correct parameter to set the audio format
-    # and update the following line accordingly.
     audio_response = openai_client.audio.speech.create(
         model=selected_model,
         voice=selected_voice,
@@ -20,7 +21,8 @@ def synthesize_speech(input_text, selected_model, selected_voice, audio_format):
     )
     # Determine the file extension based on the selected audio format
-    file_extension = f".{audio_format}" if audio_format in ['mp3', 'aac', 'flac'] else ".opus"
     # Save the synthesized speech to a temporary audio file
     with tempfile.NamedTemporaryFile(suffix=file_extension, delete=False) as audio_temp:
@@ -29,20 +31,27 @@ def synthesize_speech(input_text, selected_model, selected_voice, audio_format):
     return audio_file_path
 # Define the Gradio interface
 with grd.Blocks() as speech_synthesizer_interface:
     grd.Markdown("# <center> Text-to-Speech Synthesizer </center>")
     with grd.Row():
-        model_selector = grd.Dropdown(choices=['tts-1', 'tts-1-hd'], label='Choose Model', value='tts-1')
-        voice_selector = grd.Dropdown(choices=['alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer'], label='Select Voice', value='alloy')
-        format_selector = grd.Dropdown(choices=['mp3', 'opus', 'aac', 'flac'], label='Select Format', value='mp3')
-    input_field = grd.Textbox(label="Enter your text here", placeholder="Type here and convert to speech.")
     synthesis_button = grd.Button("Convert to Speech")
     audio_result = grd.Audio(label="Generated Speech")
-    input_field.submit(fn=synthesize_speech, inputs=[input_field, model_selector, voice_selector, format_selector], outputs=audio_result)
-    synthesis_button.click(fn=synthesize_speech, inputs=[input_field, model_selector, voice_selector, format_selector], outputs=audio_result)
 # Launch the interface
-speech_synthesizer_interface.launch()

 openai_client = OpenAI()
 def synthesize_speech(input_text, selected_model, selected_voice, audio_format):
+    # This is a new feature from OpenAI, so please check the documentation for the correct parameter to set the audio format.
+    # See: https://platform.openai.com/docs/guides/text-to-speech
     audio_response = openai_client.audio.speech.create(
         model=selected_model,
         voice=selected_voice,
     )
     # Determine the file extension based on the selected audio format
+    file_extension = f".{audio_format}" if audio_format in [
+        'mp3', 'aac', 'flac'] else ".opus"
     # Save the synthesized speech to a temporary audio file
     with tempfile.NamedTemporaryFile(suffix=file_extension, delete=False) as audio_temp:
     return audio_file_path
 # Define the Gradio interface
 with grd.Blocks() as speech_synthesizer_interface:
     grd.Markdown("# <center> Text-to-Speech Synthesizer </center>")
     with grd.Row():
+        model_selector = grd.Dropdown(
+            choices=['tts-1', 'tts-1-hd'], label='Choose Model', value='tts-1')
+        voice_selector = grd.Dropdown(choices=[
+                                      'alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer'], label='Select Voice', value='alloy')
+        format_selector = grd.Dropdown(
+            choices=['mp3', 'opus', 'aac', 'flac'], label='Select Format', value='mp3')
+    input_field = grd.Textbox(
+        label="Enter your text here", placeholder="Type here and convert to speech.")
     synthesis_button = grd.Button("Convert to Speech")
     audio_result = grd.Audio(label="Generated Speech")
+    input_field.submit(fn=synthesize_speech, inputs=[
+                       input_field, model_selector, voice_selector, format_selector], outputs=audio_result)
+    synthesis_button.click(fn=synthesize_speech, inputs=[
+                           input_field, model_selector, voice_selector, format_selector], outputs=audio_result)
 # Launch the interface
+speech_synthesizer_interface.launch()