# -*- coding: utf-8 -*- from Chat import Chat import gradio as gr import speech_recognition as sr from scipy.io.wavfile import write, read from gtts import gTTS from pydub import AudioSegment import openai import requests import os API_KEY = os.environ.get("API_KEY") openai.api_key = API_KEY r = sr.Recognizer() chat = Chat(api=API_KEY) INPUT_VOICE_FILE_NAME = "./voices/voice_from_gradio.wav" OUTPUT_VOICE_FILE_NAME_MP3 = "./voices/voice_output.mp3" OUTPUT_VOICE_FILE_NAME = "./voices/voice_output.wav" BACKEND_URL = os.environ.get("BACKEND_URL") # ===================================================================== def speech2text(file_name): ''' voice: option from Dropdown component. ''' audio_file= open(file_name, "rb") transcript = openai.Audio.transcribe("whisper-1", audio_file) speech2text_results = transcript['text'] return speech2text_results # ===================================================================== # 傳送訊息至 chatGPT,並將回傳的文字播放成語音 def conversation_to_teacher(student_says): teacher_says = chat.send_message(student_says) tts = gTTS(text=teacher_says, lang='zh-TW') tts.save(OUTPUT_VOICE_FILE_NAME_MP3) sound = AudioSegment.from_mp3(OUTPUT_VOICE_FILE_NAME_MP3) sound.export(OUTPUT_VOICE_FILE_NAME, format="wav") while (teacher_says[:2] == "\n"): teacher_says = teacher_says[2:] return teacher_says # ===================================================================== def submit(voice, language): rate, scaled = voice write(INPUT_VOICE_FILE_NAME, rate, scaled) # 將 array 存成語音檔 if (language == "臺語"): url_tw2en = f'{BACKEND_URL}/api/voice/tw2en'#台語轉英文 values = {"file": ("", open(INPUT_VOICE_FILE_NAME, "rb"), "audio/wav")} response = requests.post(url_tw2en, files=values) with open(INPUT_VOICE_FILE_NAME, 'wb') as f: f.write(response.content) # 將語音檔轉換成文字 student_says = speech2text(INPUT_VOICE_FILE_NAME) # 將文字丟到 chatGPT 取得回覆 teacher_says = conversation_to_teacher(student_says) # 如果是語言是臺語,則目前存的訊息是英文,須將英文語音轉換成臺語 if (language == "臺語"): url_en2tw = f'{BACKEND_URL}/api/voice/en2tw'#英文轉台語 # test_audio_path = 'coffee_en.wav' audio_file = open(OUTPUT_VOICE_FILE_NAME, "rb") values = {"file": ("", audio_file, "audio/wav")} response = requests.post(url_en2tw, files=values) with open(OUTPUT_VOICE_FILE_NAME, 'wb') as f: f.write(response.content) return [student_says, teacher_says, read(OUTPUT_VOICE_FILE_NAME)] # ===================================================================== with gr.Blocks() as demo: audio_inupt = gr.Audio(source="microphone") gr.Examples( [ ["./voices/我會說中文.mp3"], ["./voices/我會說英文.mp3"], ["./voices/臺語coffee.wav"], ], audio_inupt, ) dropdown = gr.Dropdown( ["中文", "English", "臺語"], label="Language", ), btn_submit = gr.Button( value="Submit", variant="primary", ) # ===================================================================== text_student = gr.Textbox(label="Content of Your voice") text_teacher = gr.Textbox(label="Reply from ChatGPT") audio_teacher = gr.Audio(label="ChatGPT voice") # ===================================================================== btn_submit.click( submit, inputs=[audio_inupt, dropdown[0]], outputs=[text_student, text_teacher, audio_teacher] ) demo.launch()