Kevin676 commited on
Commit
2531969
ยท
1 Parent(s): 7cd4494

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -19
app.py CHANGED
@@ -1,26 +1,37 @@
 
 
 
1
  import whisper
 
 
 
 
2
 
3
  model = whisper.load_model("base")
4
 
 
 
 
5
 
6
- import gradio as gr
7
- import time
8
-
9
- import openai
10
- openai.api_key = "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" # ๅกซๅ†™ๆ‚จ็š„OpenAI API key
11
 
12
- messages = [
13
- #{"role": "system", "content": "You are a TOEFL examiner who will ask me questions in a TOEFL test."}
14
- #{"role": "system", "content": "You are a therapist. Please cure people mentally"}
15
- #{"role": "system", "content": "You are a comedian. Please say something funny and make people laugh."}
16
  {"role": "system", "content": "You are my personal assistant. Your name is Alice."}
17
  ]
18
 
19
  res = []
20
 
21
- def transcribe(audio):
 
 
 
 
 
 
22
 
23
- #time.sleep(3)
24
  # load audio and pad/trim it to fit 30 seconds
25
  audio = whisper.load_audio(audio)
26
  audio = whisper.pad_or_trim(audio)
@@ -36,7 +47,13 @@ def transcribe(audio):
36
  options = whisper.DecodingOptions()
37
  result = whisper.decode(model, mel, options)
38
  res.append(result.text)
39
- result.text
 
 
 
 
 
 
40
 
41
  # chatgpt
42
  n = len(res)
@@ -50,25 +67,35 @@ def transcribe(audio):
50
 
51
  chat_response = completion.choices[0].message.content
52
 
53
- messages.append({"role": "assistant", "content": chat_response})
54
-
55
 
 
 
 
 
56
 
 
 
57
 
58
- return [result.text, chat_response]
59
 
 
60
 
61
  output_1 = gr.Textbox(label="Speech to Text")
62
  output_2 = gr.Textbox(label="ChatGPT Output")
63
-
64
 
65
  gr.Interface(
66
  title = 'TalktoAI๏ผŒ้šๆ—ถ้šๅœฐ๏ผŒ่ฐˆๅคฉ่ฏดๅœฐ๏ผ',
 
67
  fn=transcribe,
68
  inputs=[
69
- gr.inputs.Audio(source="microphone", type="filepath")
 
 
 
70
  ],
71
  outputs=[
72
- output_1, output_2
73
  ],
74
- live=True).launch()
 
1
+ import os
2
+ os.system("pip install git+https://github.com/openai/whisper.git")
3
+ import gradio as gr
4
  import whisper
5
+ import time
6
+ import openai
7
+ from gtts import gTTS
8
+ from IPython.display import Audio
9
 
10
  model = whisper.load_model("base")
11
 
12
+ mes1 = [
13
+ {"role": "system", "content": "You are a TOEFL examiner who will ask me questions in a TOEFL test."}
14
+ ]
15
 
16
+ mes2 = [
17
+ {"role": "system", "content": "You are a mental health therapist. Your name is Tina."}
18
+ ]
 
 
19
 
20
+ mes3 = [
 
 
 
21
  {"role": "system", "content": "You are my personal assistant. Your name is Alice."}
22
  ]
23
 
24
  res = []
25
 
26
+ question = []
27
+
28
+ def transcribe(apikey, audio, choice1, choice2):
29
+
30
+ #global messages
31
+
32
+ openai.api_key = apikey
33
 
34
+ # time.sleep(3)
35
  # load audio and pad/trim it to fit 30 seconds
36
  audio = whisper.load_audio(audio)
37
  audio = whisper.pad_or_trim(audio)
 
47
  options = whisper.DecodingOptions()
48
  result = whisper.decode(model, mel, options)
49
  res.append(result.text)
50
+
51
+ if choice1 == "TOEFL":
52
+ messages = mes1
53
+ elif choice1 == "Therapist":
54
+ messages = mes2
55
+ else:
56
+ messages = mes3
57
 
58
  # chatgpt
59
  n = len(res)
 
67
 
68
  chat_response = completion.choices[0].message.content
69
 
70
+ messages.append({"role": "assistant", "content": chat_response})
 
71
 
72
+ if choice2 == "Chinese":
73
+ tts = gTTS(chat_response, lang='zh-CN')
74
+ elif choice2 == "English":
75
+ tts = gTTS(chat_response, lang='en', tld='us')
76
 
77
+ tts.save('1.wav')
78
+ sound_file = '1.wav'
79
 
80
+ question.append(chat_response)
81
 
82
+ return [result.text, chat_response, sound_file, res, question]
83
 
84
  output_1 = gr.Textbox(label="Speech to Text")
85
  output_2 = gr.Textbox(label="ChatGPT Output")
86
+ output_3 = gr.Audio(label="Audio")
87
 
88
  gr.Interface(
89
  title = 'TalktoAI๏ผŒ้šๆ—ถ้šๅœฐ๏ผŒ่ฐˆๅคฉ่ฏดๅœฐ๏ผ',
90
+ description = "่ฎฉๅ…ทๆœ‰ไบบๆ–‡ๅ…ณๆ€€็š„AI้€ ็ฆๆฏไธ€ไธชไบบ๏ผ็ง‘ๆŠ€ๅ‘ๅ–„๏ผŒAI็’€็’จ๏ผ",
91
  fn=transcribe,
92
  inputs=[
93
+ gr.Textbox(lines=1, label = "่ฏทๅกซๅ†™ๆ‚จ็š„OpenAI_API_key"),
94
+ gr.inputs.Audio(source="microphone", type="filepath"),
95
+ gr.Radio(["TOEFL", "Therapist", "Alice"], label="TOEFL Examiner, Therapist Tina, or Assistant Alice?"),
96
+ gr.Radio(["Chinese", "English"], label="Chinese or English?")
97
  ],
98
  outputs=[
99
+ output_1, output_2, output_3
100
  ],
101
+ ).launch()