BettyHsu's picture
Upload app.py
2376c39
# -*- coding: utf-8 -*-
from Chat import Chat
import gradio as gr
import speech_recognition as sr
from scipy.io.wavfile import write, read
from gtts import gTTS
from pydub import AudioSegment
import openai
import requests
import os
API_KEY = os.environ.get("API_KEY")
openai.api_key = API_KEY
r = sr.Recognizer()
chat = Chat(api=API_KEY)
INPUT_VOICE_FILE_NAME = "./voices/voice_from_gradio.wav"
OUTPUT_VOICE_FILE_NAME_MP3 = "./voices/voice_output.mp3"
OUTPUT_VOICE_FILE_NAME = "./voices/voice_output.wav"
BACKEND_URL = os.environ.get("BACKEND_URL")
# =====================================================================
def speech2text(file_name):
'''
voice: option from Dropdown component.
'''
audio_file= open(file_name, "rb")
transcript = openai.Audio.transcribe("whisper-1", audio_file)
speech2text_results = transcript['text']
return speech2text_results
# =====================================================================
# 傳送訊息至 chatGPT,並將回傳的文字播放成語音
def conversation_to_teacher(student_says):
teacher_says = chat.send_message(student_says)
tts = gTTS(text=teacher_says, lang='zh-TW')
tts.save(OUTPUT_VOICE_FILE_NAME_MP3)
sound = AudioSegment.from_mp3(OUTPUT_VOICE_FILE_NAME_MP3)
sound.export(OUTPUT_VOICE_FILE_NAME, format="wav")
while (teacher_says[:2] == "\n"):
teacher_says = teacher_says[2:]
return teacher_says
# =====================================================================
def submit(voice, language):
rate, scaled = voice
write(INPUT_VOICE_FILE_NAME, rate, scaled)
# 將 array 存成語音檔
if (language == "臺語"):
url_tw2en = f'{BACKEND_URL}/api/voice/tw2en'#台語轉英文
values = {"file": ("", open(INPUT_VOICE_FILE_NAME, "rb"), "audio/wav")}
response = requests.post(url_tw2en, files=values)
with open(INPUT_VOICE_FILE_NAME, 'wb') as f:
f.write(response.content)
# 將語音檔轉換成文字
student_says = speech2text(INPUT_VOICE_FILE_NAME)
# 將文字丟到 chatGPT 取得回覆
teacher_says = conversation_to_teacher(student_says)
# 如果是語言是臺語,則目前存的訊息是英文,須將英文語音轉換成臺語
if (language == "臺語"):
url_en2tw = f'{BACKEND_URL}/api/voice/en2tw'#英文轉台語
# test_audio_path = 'coffee_en.wav'
audio_file = open(OUTPUT_VOICE_FILE_NAME, "rb")
values = {"file": ("", audio_file, "audio/wav")}
response = requests.post(url_en2tw, files=values)
with open(OUTPUT_VOICE_FILE_NAME, 'wb') as f:
f.write(response.content)
return [student_says, teacher_says, read(OUTPUT_VOICE_FILE_NAME)]
# =====================================================================
with gr.Blocks() as demo:
audio_inupt = gr.Audio(source="microphone")
gr.Examples(
[
["./voices/我會說中文.mp3"],
["./voices/我會說英文.mp3"],
["./voices/臺語coffee.wav"],
],
audio_inupt,
)
dropdown = gr.Dropdown(
["中文", "English", "臺語"],
label="Language",
),
btn_submit = gr.Button(
value="Submit",
variant="primary",
)
# =====================================================================
text_student = gr.Textbox(label="Content of Your voice")
text_teacher = gr.Textbox(label="Reply from ChatGPT")
audio_teacher = gr.Audio(label="ChatGPT voice")
# =====================================================================
btn_submit.click(
submit,
inputs=[audio_inupt, dropdown[0]],
outputs=[text_student, text_teacher, audio_teacher]
)
demo.launch()