Kevin676 commited on
Commit
f73265e
Β·
1 Parent(s): f770813

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -0
app.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ! pip install git+https://github.com/openai/whisper.git -q
2
+ pip install openai
3
+ import whisper
4
+
5
+ model = whisper.load_model("base")
6
+ model.device
7
+ ! pip install gradio -q
8
+ pip install gTTS
9
+ from gtts import gTTS
10
+ from IPython.display import Audio
11
+
12
+ import gradio as gr
13
+ import time
14
+
15
+ import openai
16
+ openai.api_key = "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" # ε‘«ε†™ζ‚¨ηš„OpenAI API key
17
+
18
+ messages = [
19
+ #{"role": "system", "content": "You are a TOEFL examiner who will ask me questions in a TOEFL test."}
20
+ #{"role": "system", "content": "You are a therapist. Please cure people mentally"}
21
+ #{"role": "system", "content": "You are a comedian. Please say something funny and make people laugh."}
22
+ {"role": "system", "content": "You are my personal assistant. Your name is Alice."}
23
+ ]
24
+
25
+ res = []
26
+
27
+ question = []
28
+
29
+ def transcribe(audio):
30
+
31
+ #time.sleep(3)
32
+ # load audio and pad/trim it to fit 30 seconds
33
+ audio = whisper.load_audio(audio)
34
+ audio = whisper.pad_or_trim(audio)
35
+
36
+ # make log-Mel spectrogram and move to the same device as the model
37
+ mel = whisper.log_mel_spectrogram(audio).to(model.device)
38
+
39
+ # detect the spoken language
40
+ _, probs = model.detect_language(mel)
41
+ print(f"Detected language: {max(probs, key=probs.get)}")
42
+
43
+ # decode the audio
44
+ options = whisper.DecodingOptions()
45
+ result = whisper.decode(model, mel, options)
46
+ res.append(result.text)
47
+ result.text
48
+
49
+ # chatgpt
50
+ n = len(res)
51
+ content = res[n-1]
52
+ messages.append({"role": "user", "content": content})
53
+
54
+ completion = openai.ChatCompletion.create(
55
+ model = "gpt-3.5-turbo",
56
+ messages = messages
57
+ )
58
+
59
+ chat_response = completion.choices[0].message.content
60
+
61
+ messages.append({"role": "assistant", "content": chat_response})
62
+
63
+ tts = gTTS(chat_response, lang='en', tld='us')
64
+ #tts = gTTS(chat_response, lang='zh-CN')
65
+
66
+ tts.save('1.wav')
67
+ sound_file = '1.wav'
68
+
69
+ question.append(chat_response)
70
+
71
+ return [result.text, chat_response, sound_file, res, question]
72
+
73
+
74
+ output_1 = gr.Textbox(label="Speech to Text")
75
+ output_2 = gr.Textbox(label="ChatGPT Output")
76
+ output_3 = gr.Audio(label="Audio")
77
+
78
+ gr.Interface(
79
+ title = 'TalktoAIοΌŒιšζ—Άιšεœ°οΌŒθ°ˆε€©θ―΄εœ°οΌ',
80
+ fn=transcribe,
81
+ inputs=[
82
+ gr.inputs.Audio(source="microphone", type="filepath")
83
+ ],
84
+ outputs=[
85
+ output_1, output_2, output_3
86
+ ],
87
+ live=True).launch(share = True)