import gradio as gr import tempfile import azure.cognitiveservices.speech as speechsdk first_alert = "first.wav" next_alert = "next.wav" def Txt_To_Speech(SpeechRegion, SpeechKey, text): if (SpeechRegion == '' or SpeechKey == '' or text == ''): output= first_alert else: try: speech_config = speechsdk.SpeechConfig(subscription= SpeechKey, region=SpeechRegion) audio_config = speechsdk.audio.AudioOutputConfig(use_default_speaker=True) speech_config.speech_synthesis_voice_name='en-US-JennyNeural' speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config) result = speech_synthesizer.speak_text_async(text).get() stream = speechsdk.AudioDataStream(result = result) with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file: stream.save_to_wav_file(temp_file.name) temp_file_path = temp_file.name output = temp_file_path except : output = next_alert return output with gr.Blocks() as demo: gr.Markdown("# TTS Text To Speech using OpenAI ") text1 = gr.Textbox(type = 'password',label="Enter your Speech Region", placeholder="Speech Region", lines=1) text2 = gr.Textbox(label="SpeechKey",placeholder="Enter your Speech Key") text3 = gr.Textbox(label="Inputs",placeholder="Enter your Inputs") gr.Interface( Txt_To_Speech, [ text1,text2,text3 ], outputs=gr.Audio(label="Speech Output") ) demo.launch()