Spaces:
Runtime error
Runtime error
| import os | |
| import warnings | |
| import whisper | |
| import gradio as gr | |
| import openai | |
| from gtts import gTTS | |
| # Load model outside high-frequency function | |
| model = whisper.load_model("base") | |
| device = model.device | |
| # Separate OpenAI API functionality into a function | |
| def call_openai_api(text): | |
| openai.api_key = os.getenv("OPENAI_API_KEY") | |
| result = openai.Completion.create( | |
| model="gpt-3.5-turbo-instruct", | |
| prompt=text, | |
| max_tokens=500, | |
| temperature=0 | |
| ) | |
| return result["choices"][0]["text"] | |
| def transcribe(audio): | |
| # load audio and pad/trim it to fit 30 seconds | |
| audio = whisper.load_audio(audio) | |
| audio = whisper.pad_or_trim(audio) | |
| # make log-Mel spectrogram and move to the same device as the model | |
| mel = whisper.log_mel_spectrogram(audio).to(device) | |
| # detect the spoken language | |
| with warnings.catch_warnings(): | |
| warnings.simplefilter("ignore") | |
| _, _ = model.detect_language(mel) | |
| # decode the audio | |
| options = whisper.DecodingOptions(fp16=False) | |
| result = whisper.decode(model, mel, options) | |
| result_text = result.text | |
| # Call OpenAI API for response | |
| out_result = call_openai_api(result_text) | |
| # Generate speech audio | |
| speech = gTTS(out_result) | |
| speech.save("test.mp3") | |
| return [result_text, out_result, "test.mp3"] | |
| output_1 = gr.Textbox(label="Speech to Text") | |
| output_2 = gr.Textbox(label="ChatGPT Output") | |
| output_3 = gr.Audio(type="filepath", autoplay=True) | |
| demo = gr.Interface( | |
| title = 'Voice to Text & Voice reply using OpenAI (KF)', | |
| fn=transcribe, | |
| inputs=gr.Audio(sources=["microphone"], type="filepath"), | |
| outputs=[output_1, output_2, output_3] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |