basit123796 commited on
Commit
58565da
·
1 Parent(s): 9e13b91

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -0
app.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import whisper
2
+ import gradio as gr
3
+ import time
4
+ import warnings
5
+ import json
6
+ import openai
7
+ import os
8
+ from gtts import gTTS
9
+ warnings.filterwarnings("ignore")
10
+ with open('GPT_SECRET_KEY.json') as f:
11
+ data = json.load(f)
12
+ openai.api_key = data["API_KEY"]
13
+ model = whisper.load_model("base")
14
+ model.device
15
+ !ffmpeg -f lavfi -i anullsrc=r=44100:cl=mono -t 10 -q:a 9 -acodec libmp3lame Temp.mp3
16
+ def chatgpt_api(input_text):
17
+ messages = [
18
+ {"role": "system", "content": "You are a helpful assistant."}]
19
+
20
+ if input_text:
21
+ messages.append(
22
+ {"role": "user", "content": input_text},
23
+ )
24
+ chat_completion = openai.ChatCompletion.create(
25
+ model="gpt-3.5-turbo", messages=messages
26
+ )
27
+
28
+ reply = chat_completion.choices[0].message.content
29
+ return reply
30
+ def transcribe(audio):
31
+
32
+ language = 'en'
33
+
34
+ audio = whisper.load_audio(audio)
35
+ audio = whisper.pad_or_trim(audio)
36
+
37
+ mel = whisper.log_mel_spectrogram(audio).to(model.device)
38
+
39
+ _, probs = model.detect_language(mel)
40
+
41
+ options = whisper.DecodingOptions()
42
+ result = whisper.decode(model, mel, options)
43
+ result_text = result.text
44
+
45
+ out_result = chatgpt_api(result_text)
46
+
47
+ audioobj = gTTS(text = out_result,
48
+ lang = language,
49
+ slow = False)
50
+
51
+ audioobj.save("Temp.mp3")
52
+
53
+ return [result_text, out_result, "Temp.mp3"]
54
+ output_1 = gr.Textbox(label="Speech to Text")
55
+ output_2 = gr.Textbox(label="ChatGPT Output")
56
+ output_3 = gr.Audio("Temp.mp3")
57
+
58
+ gr.Interface(
59
+ title = 'OpenAI Whisper and ChatGPT ASR Gradio Web UI',
60
+ fn=transcribe,
61
+ inputs=[
62
+ gr.inputs.Audio(source="microphone", type="filepath")
63
+ ],
64
+
65
+ outputs=[
66
+ output_1, output_2, output_3
67
+ ],
68
+ live=True).launch()