Kevin676 commited on
Commit
cbdeac7
·
1 Parent(s): dfe8894

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -0
app.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import openai
3
+ import whisper
4
+ import time
5
+ from gtts import gTTS
6
+ from IPython.display import Audio
7
+ model = whisper.load_model("base")
8
+ openai.api_key = "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"
9
+
10
+ res = []
11
+
12
+ question = []
13
+
14
+ def transcribe(audio):
15
+
16
+ #time.sleep(3)
17
+ # load audio and pad/trim it to fit 30 seconds
18
+ audio = whisper.load_audio(audio)
19
+ audio = whisper.pad_or_trim(audio)
20
+
21
+ # make log-Mel spectrogram and move to the same device as the model
22
+ mel = whisper.log_mel_spectrogram(audio).to(model.device)
23
+
24
+ # detect the spoken language
25
+ _, probs = model.detect_language(mel)
26
+ print(f"Detected language: {max(probs, key=probs.get)}")
27
+
28
+ # decode the audio
29
+ options = whisper.DecodingOptions()
30
+ result = whisper.decode(model, mel, options)
31
+ res.append(result.text)
32
+
33
+ # chatgpt
34
+ n = len(res)
35
+ content = res[n-1]
36
+ messages.append({"role": "user", "content": content})
37
+
38
+ completion = openai.ChatCompletion.create(
39
+ model = "gpt-3.5-turbo",
40
+ messages = messages
41
+ )
42
+
43
+ chat_response = completion.choices[0].message.content
44
+
45
+ messages.append({"role": "assistant", "content": chat_response})
46
+
47
+ tts = gTTS(chat_response, lang='en', tld='us')
48
+ #tts = gTTS(chat_response, lang='zh-CN')
49
+
50
+ tts.save('1.wav')
51
+ sound_file = '1.wav'
52
+
53
+ question.append(chat_response)
54
+
55
+ return [result.text, chat_response, sound_file, res, question]
56
+
57
+ output_1 = gr.Textbox(label="Speech to Text")
58
+ output_2 = gr.Textbox(label="ChatGPT Output")
59
+ output_3 = gr.Audio(label="Audio")
60
+
61
+ gr.Interface(
62
+ title = 'TalktoAI,随时随地,谈天说地!',
63
+ fn=transcribe,
64
+ inputs=[
65
+ gr.inputs.Audio(source="microphone", type="filepath")
66
+ ],
67
+ outputs=[
68
+ output_1, output_2, output_3
69
+ ],
70
+ live=True).launch(share = True)