Spaces:
Sleeping
Sleeping
File size: 1,607 Bytes
cbe6c2e 96feb61 cbe6c2e 76e4769 cbe6c2e f925369 cbe6c2e 29cc7eb 6bc60d4 cbe6c2e fa5008e d60fb9a cbe6c2e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 | import openai
from playsound import playsound
from gtts import gTTS
import speech_recognition as sr
import gradio as gr
import os
openai.api_key = os.environ['api_key']
def generate_response(prompt):
prompt = (f"{prompt}")
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "user", "content": f"{prompt}"},
])
message = response.choices[0]['message']['content']
return message
r = sr.Recognizer()
from pydub import AudioSegment
def transcribe(audio, lang):
with sr.AudioFile(audio) as source: audio = r.record(source)
text = r.recognize_google(audio, language=lang)
text = generate_response(text)
tts = gTTS(text=text, lang=lang)
out = "tmp.mp3"
tts.save(out)
return out
with open('gradio_article.md') as f:
article = f.read()
interface_options = {
"title": "Smart GPT",
"description": "Let's have a chat! Talk to me, and I'll respond in a jiffy!",
"article": article,
"layout": "horizontal",
"theme": "default",
}
inputs = gr.Audio(source="microphone", type="filepath")
outputs = "audio"
lang = gr.Dropdown(choices=["en", "vi", "nl"], value="en", resettable=False)
if lang.value == "":
lang.value = "en"
gr.Interface(fn=transcribe, inputs=[inputs, lang], outputs=outputs, live=False,
allow_clear=False, **interface_options).launch()
# TODO
# Custom voice
# VALL-E
# https://cloud.google.com/text-to-speech/custom-voice/docs/quickstart
# Mozilla TTS
# OpenSeq2Seq
# Best VN: Vbee, FPT
# Elevenlabs for English |