Spaces:

BettyHsu
/

speech2text_chatgpt

Runtime error

App Files Files Community

speech2text_chatgpt / app.py

BettyHsu

Upload app.py

2376c39 almost 3 years ago

raw

history blame contribute delete

3.58 kB

	# -- coding: utf-8 --
	from Chat import Chat
	import gradio as gr
	import speech_recognition as sr
	from scipy.io.wavfile import write, read
	from gtts import gTTS
	from pydub import AudioSegment
	import openai
	import requests
	import os
	API_KEY = os.environ.get("API_KEY")
	openai.api_key = API_KEY
	r = sr.Recognizer()
	chat = Chat(api=API_KEY)

	INPUT_VOICE_FILE_NAME = "./voices/voice_from_gradio.wav"
	OUTPUT_VOICE_FILE_NAME_MP3 = "./voices/voice_output.mp3"
	OUTPUT_VOICE_FILE_NAME = "./voices/voice_output.wav"
	BACKEND_URL = os.environ.get("BACKEND_URL")
	# =====================================================================
	def speech2text(file_name):
	'''
	voice: option from Dropdown component.
	'''
	audio_file= open(file_name, "rb")
	transcript = openai.Audio.transcribe("whisper-1", audio_file)
	speech2text_results = transcript['text']
	return speech2text_results

	# =====================================================================
	# 傳送訊息至 chatGPT，並將回傳的文字播放成語音
	def conversation_to_teacher(student_says):
	teacher_says = chat.send_message(student_says)
	tts = gTTS(text=teacher_says, lang='zh-TW')
	tts.save(OUTPUT_VOICE_FILE_NAME_MP3)
	sound = AudioSegment.from_mp3(OUTPUT_VOICE_FILE_NAME_MP3)
	sound.export(OUTPUT_VOICE_FILE_NAME, format="wav")
	while (teacher_says[:2] == "\n"):
	teacher_says = teacher_says[2:]
	return teacher_says

	# =====================================================================
	def submit(voice, language):
	rate, scaled = voice
	write(INPUT_VOICE_FILE_NAME, rate, scaled)
	# 將 array 存成語音檔
	if (language == "臺語"):
	url_tw2en = f'{BACKEND_URL}/api/voice/tw2en'#台語轉英文
	values = {"file": ("", open(INPUT_VOICE_FILE_NAME, "rb"), "audio/wav")}
	response = requests.post(url_tw2en, files=values)
	with open(INPUT_VOICE_FILE_NAME, 'wb') as f:
	f.write(response.content)
	# 將語音檔轉換成文字
	student_says = speech2text(INPUT_VOICE_FILE_NAME)
	# 將文字丟到 chatGPT 取得回覆
	teacher_says = conversation_to_teacher(student_says)
	# 如果是語言是臺語，則目前存的訊息是英文，須將英文語音轉換成臺語
	if (language == "臺語"):
	url_en2tw = f'{BACKEND_URL}/api/voice/en2tw'#英文轉台語
	# test_audio_path = 'coffee_en.wav'
	audio_file = open(OUTPUT_VOICE_FILE_NAME, "rb")
	values = {"file": ("", audio_file, "audio/wav")}
	response = requests.post(url_en2tw, files=values)
	with open(OUTPUT_VOICE_FILE_NAME, 'wb') as f:
	f.write(response.content)
	return [student_says, teacher_says, read(OUTPUT_VOICE_FILE_NAME)]

	# =====================================================================
	with gr.Blocks() as demo:
	audio_inupt = gr.Audio(source="microphone")
	gr.Examples(
	[
	["./voices/我會說中文.mp3"],
	["./voices/我會說英文.mp3"],
	["./voices/臺語coffee.wav"],
	],
	audio_inupt,
	)
	dropdown = gr.Dropdown(
	["中文", "English", "臺語"],
	label="Language",
	),
	btn_submit = gr.Button(
	value="Submit",
	variant="primary",
	)

	# =====================================================================
	text_student = gr.Textbox(label="Content of Your voice")
	text_teacher = gr.Textbox(label="Reply from ChatGPT")
	audio_teacher = gr.Audio(label="ChatGPT voice")
	# =====================================================================
	btn_submit.click(
	submit,
	inputs=[audio_inupt, dropdown[0]],
	outputs=[text_student, text_teacher, audio_teacher]
	)

	demo.launch()