import gradio as gr
import tempfile
import azure.cognitiveservices.speech as speechsdk
first_alert = "first.wav"
next_alert = "next.wav"

def Txt_To_Speech(SpeechRegion, SpeechKey, text):
    if (SpeechRegion == '' or SpeechKey == '' or text == ''):
        output= first_alert
    else:
        try:
           speech_config = speechsdk.SpeechConfig(subscription= SpeechKey, region=SpeechRegion)
           audio_config = speechsdk.audio.AudioOutputConfig(use_default_speaker=True)

           speech_config.speech_synthesis_voice_name='en-US-JennyNeural'

           speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)

           result = speech_synthesizer.speak_text_async(text).get()
    
           
           stream = speechsdk.AudioDataStream(result = result)
           with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
               stream.save_to_wav_file(temp_file.name)
               temp_file_path = temp_file.name
           
           output = temp_file_path 
        except :
           output = next_alert
    return output


with gr.Blocks() as demo:
    gr.Markdown("# TTS Text To Speech using OpenAI ")

    text1 = gr.Textbox(type = 'password',label="Enter your Speech Region", placeholder="Speech Region", lines=1)
    text2 = gr.Textbox(label="SpeechKey",placeholder="Enter your Speech Key")
    text3 = gr.Textbox(label="Inputs",placeholder="Enter your Inputs")

    gr.Interface(
        Txt_To_Speech,
        [
            text1,text2,text3
        ],
        outputs=gr.Audio(label="Speech Output")
    )

    
demo.launch()