Spaces:

BettyHsu
/

speech2text_chatgpt

Runtime error

File size: 3,579 Bytes

# -*- coding: utf-8 -*-
from Chat import Chat
import gradio as gr
import speech_recognition as sr
from scipy.io.wavfile import write, read
from gtts import gTTS
from pydub import AudioSegment
import openai
import requests
import os
API_KEY = os.environ.get("API_KEY")
openai.api_key = API_KEY
r = sr.Recognizer()
chat = Chat(api=API_KEY)

INPUT_VOICE_FILE_NAME = "./voices/voice_from_gradio.wav"
OUTPUT_VOICE_FILE_NAME_MP3 = "./voices/voice_output.mp3"
OUTPUT_VOICE_FILE_NAME = "./voices/voice_output.wav"
BACKEND_URL = os.environ.get("BACKEND_URL")
# =====================================================================
def speech2text(file_name):
	'''
	voice: option from Dropdown component.
	'''
	audio_file= open(file_name, "rb")
	transcript = openai.Audio.transcribe("whisper-1", audio_file)
	speech2text_results = transcript['text']
	return speech2text_results

# =====================================================================
# 傳送訊息至 chatGPT，並將回傳的文字播放成語音
def conversation_to_teacher(student_says):
	teacher_says = chat.send_message(student_says)
	tts = gTTS(text=teacher_says, lang='zh-TW')
	tts.save(OUTPUT_VOICE_FILE_NAME_MP3)
	sound = AudioSegment.from_mp3(OUTPUT_VOICE_FILE_NAME_MP3)
	sound.export(OUTPUT_VOICE_FILE_NAME, format="wav")
	while (teacher_says[:2] == "\n"):
		teacher_says = teacher_says[2:]
	return teacher_says

# =====================================================================
def submit(voice, language):
	rate, scaled = voice
	write(INPUT_VOICE_FILE_NAME, rate, scaled)
	# 將 array 存成語音檔
	if (language == "臺語"):
		url_tw2en = f'{BACKEND_URL}/api/voice/tw2en'#台語轉英文
		values = {"file": ("", open(INPUT_VOICE_FILE_NAME, "rb"), "audio/wav")}
		response = requests.post(url_tw2en, files=values)
		with open(INPUT_VOICE_FILE_NAME, 'wb') as f:
			f.write(response.content)
	# 將語音檔轉換成文字
	student_says = speech2text(INPUT_VOICE_FILE_NAME)
	# 將文字丟到 chatGPT 取得回覆
	teacher_says = conversation_to_teacher(student_says)
	# 如果是語言是臺語，則目前存的訊息是英文，須將英文語音轉換成臺語
	if (language == "臺語"):
		url_en2tw = f'{BACKEND_URL}/api/voice/en2tw'#英文轉台語
		# test_audio_path = 'coffee_en.wav'
		audio_file = open(OUTPUT_VOICE_FILE_NAME, "rb")
		values = {"file": ("", audio_file, "audio/wav")}
		response = requests.post(url_en2tw, files=values)
		with open(OUTPUT_VOICE_FILE_NAME, 'wb') as f:
			f.write(response.content)
	return [student_says, teacher_says, read(OUTPUT_VOICE_FILE_NAME)]

# =====================================================================
with gr.Blocks() as demo:
    audio_inupt = gr.Audio(source="microphone")
    gr.Examples(
        [
            ["./voices/我會說中文.mp3"],
            ["./voices/我會說英文.mp3"],
            ["./voices/臺語coffee.wav"],
        ],
        audio_inupt,
    )
    dropdown = gr.Dropdown(
        ["中文", "English", "臺語"],
        label="Language",
    ),
    btn_submit = gr.Button(
		value="Submit",
		variant="primary",
	)
    
    # =====================================================================
    text_student = gr.Textbox(label="Content of Your voice")
    text_teacher = gr.Textbox(label="Reply from ChatGPT")
    audio_teacher = gr.Audio(label="ChatGPT voice")
    # =====================================================================
    btn_submit.click(
        submit,
        inputs=[audio_inupt, dropdown[0]],
        outputs=[text_student, text_teacher, audio_teacher]
    )

demo.launch()