Spaces:

MikeMai
/

ChatLingo

Runtime error

App Files Files Community

ChatLingo / app.py

MikeMai

Speech to text and Text to speech function.

a144be7 verified over 1 year ago

raw

history blame contribute delete

9.4 kB

	from openai import OpenAI
	from dotenv import load_dotenv
	import MAIAI

	# from deep_translator import GoogleTranslator

	# import speech_recognition as sr
	# import assemblyai as aai

	# import pyttsx3
	from gtts import gTTS

	import gradio as gr
	from gradio.themes.base import Base

	# Load environment variables
	load_dotenv()

	language_map = {
	"Afrikaans": "af",
	"Albanian": "sq",
	"Amharic": "am",
	"Arabic": "ar",
	"Armenian": "hy",
	"Azerbaijani": "az",
	"Basque": "eu",
	"Bengali": "bn",
	"Bosnian": "bs",
	"Bulgarian": "bg",
	"Catalan": "ca",
	"Cebuano": "ceb",
	"中文" :"zh-CN",
	"繁体中文": "zh-TW",
	"简体中文": "zh-CN",
	"Chinese" : "zh-CN",
	"Chinese (Simplified)": "zh-CN",
	"Chinese (Traditional)": "zh-TW",
	"Corsican": "co",
	"Croatian": "hr",
	"Czech": "cs",
	"Danish": "da",
	"Dutch": "nl",
	"English": "en",
	"Esperanto": "eo",
	"Estonian": "et",
	"Filipino": "fil",
	"Finnish": "fi",
	"French": "fr",
	"Frisian": "fy",
	"Galician": "gl",
	"Georgian": "ka",
	"German": "de",
	"Greek": "el",
	"Gujarati": "gu",
	"Haitian Creole": "ht",
	"Hausa": "ha",
	"Hawaiian": "haw",
	"Hebrew": "he",
	"Hindi": "hi",
	"Hmong": "hmn",
	"Hungarian": "hu",
	"Icelandic": "is",
	"Igbo": "ig",
	"Indonesian": "id",
	"Irish": "ga",
	"Italian": "it",
	"Japanese": "ja",
	"Javanese": "jv",
	"Kannada": "kn",
	"Kazakh": "kk",
	"Khmer": "km",
	"Kinyarwanda": "rw",
	"Korean": "ko",
	"Kurdish": "ku",
	"Kyrgyz": "ky",
	"Lao": "lo",
	"Latin": "la",
	"Latvian": "lv",
	"Lithuanian": "lt",
	"Luxembourgish": "lb",
	"Macedonian": "mk",
	"Malagasy": "mg",
	"Malay": "ms",
	"Malayalam": "ml",
	"Maltese": "mt",
	"Maori": "mi",
	"Marathi": "mr",
	"Mongolian": "mn",
	"Myanmar (Burmese)": "my",
	"Nepali": "ne",
	"Norwegian": "no",
	"Nyanja (Chichewa)": "ny",
	"Odia (Oriya)": "or",
	"Pashto": "ps",
	"Persian": "fa",
	"Polish": "pl",
	"Portuguese": "pt",
	"Punjabi": "pa",
	"Romanian": "ro",
	"Russian": "ru",
	"Samoan": "sm",
	"Scots Gaelic": "gd",
	"Serbian": "sr",
	"Sesotho": "st",
	"Shona": "sn",
	"Sindhi": "sd",
	"Sinhala (Sinhalese)": "si",
	"Slovak": "sk",
	"Slovenian": "sl",
	"Somali": "so",
	"Spanish": "es",
	"Sundanese": "su",
	"Swahili": "sw",
	"Swedish": "sv",
	"Tajik": "tg",
	"Tamil": "ta",
	"Tatar": "tt",
	"Telugu": "te",
	"Thai": "th",
	"Turkish": "tr",
	"Turkmen": "tk",
	"Ukrainian": "uk",
	"Urdu": "ur",
	"Uyghur": "ug",
	"Uzbek": "uz",
	"Vietnamese": "vi",
	"Welsh": "cy",
	"Xhosa": "xh",
	"Yiddish": "yi",
	"Yoruba": "yo",
	"Zulu": "zu"
	}

	# def pytts(input_text):
	# if input_text:
	# engine = pyttsx3.init()

	# # # Get available voices and print them out
	# # voices = engine.getProperty('voices')
	# # for index, voice in enumerate(voices):
	# # print(f"Voice {index}: {voice.id} - {voice.languages} - {voice.gender} - {voice.name}")

	# # # Set voice (change index based on what is available on your system)
	# # engine.setProperty('voice', voices[1].id) # Change the index to switch voices

	# # # Set speech rate
	# # rate = engine.getProperty('rate')
	# # engine.setProperty('rate', rate - 50) # Decrease rate; increase to make it faster

	# # # Set volume
	# # volume = engine.getProperty('volume')
	# # engine.setProperty('volume', volume + 0.25) # Increase volume; decrease to lower the volume

	# # Speak text
	# engine.say(input_text)
	# engine.runAndWait()

	def gtts(input_text,language='English'):
	if input_text:
	# Map the user-friendly language name to the IETF tag
	lang = language_map.get(language, 'en') # Default to 'en' if language not found
	tts = gTTS(text=input_text, lang=lang, slow=False)
	audio_file = "output.mp3"
	tts.save(audio_file)
	return audio_file

	# def assembly_speech_to_text(audio_file_path):
	# aai.settings.api_key = "e00881b941ff47ea914594c40f6dbc20"
	# transcriber = aai.Transcriber()
	# transcript = transcriber.transcribe(audio_file_path)
	# return transcript.text

	# def google_speech_to_text(audio_file_path):
	# if audio_file_path:
	# recognizer = sr.Recognizer()
	# with sr.AudioFile(audio_file_path) as source:
	# audio_data = recognizer.record(source)
	# try:
	# text = recognizer.recognize_google(audio_data)
	# return text
	# except sr.UnknownValueError:
	# return "Google Speech Recognition could not understand audio"
	# except sr.RequestError as e:
	# return f"Could not request results from Google Speech Recognition service; {e}"


	def openai_speech_to_text(audio_file_path):
	if audio_file_path:
	client = OpenAI()
	audio_file= open(audio_file_path, "rb")
	transcription = client.audio.transcriptions.create(
	model="whisper-1",
	response_format="text",
	file=audio_file
	)
	return transcription

	def chat(text, history, native_language, language, persona, tone = "Casual", model = "gpt-4o-mini"):

	print(tone, native_language, language, persona)

	# if audio != None:
	# text = speechtotext(audio)

	casual = "This is in a casual, internet texting context, use of local slangs is encouraged." if tone == "Casual" else ""

	teacher = MAIAI.Agent(model=model, temperature=0.5, role=f"You are a {language} teacher teaching {native_language} speaking student.")
	responder = MAIAI.Agent(model=model, temperature=0.5, role=f"""You are {language} speaking {persona}. Respond to the user's text in {language}. Refer to Chat History for context. Keep the conversation going. {casual}""")
	translator = MAIAI.Agent(model=model, temperature=0.5, role=f"You are a language translator")

	feedback_task = MAIAI.Task(
	agent=teacher,
	goal=f"""Text: {text}

	Point out and translate any non-{language} from the text into {language}.
	Correct any linguistic error in the text and give example driven feedback on how to improve the text.
	You MUST give your feedback in {native_language}.
	{casual}
	"""
	)

	respond_task = MAIAI.Task(
	agent=responder,
	goal=f"""{text}

	Respond to the text above in {language}.
	Refer to Chat History for context.
	Chat History: {history}"""
	)

	feedback = feedback_task.execute()

	response = respond_task.execute()

	translate_task = MAIAI.Task(
	agent=translator,
	goal=f"translate {response} from {language} to {native_language}"
	)

	translation = translate_task.execute()

	# # Get IETF tags for target and native languages
	# native_lang = language_map.get(native_language, 'en')

	# # Translate the response to the target language
	# translated_response = GoogleTranslator(source='auto', target=native_lang).translate(response)

	output = f"""
	*Feedback:*

	{feedback}

	-----------

	*{persona}:*

	{response}
	({translation})
	"""

	history.append((text, output))

	return "", history, response

	# Sample Function Call ------------------------------------

	# feedback,response = chat("Soy jugando Demonslayer! Y tu?", "English","Spanish","friendly lady",,casual_tone="Casual")

	# print(f"""
	# Feedback: {feedback}

	# Reply: {response}
	# """)

	# Gradio Custom Chatbot -------------------------------------------------------

	with gr.Blocks(fill_height=True, theme=Base()) as demo:

	chatbot = gr.Chatbot(
	elem_id="chatbot",
	bubble_full_width=False,
	scale=1,
	)

	with gr.Row():

	chat_input = gr.Textbox(interactive=True, scale=8,
	inputs=[gr.components.Audio(sources="microphone", type="filepath", label="Speak or upload audio")],
	value=openai_speech_to_text)
	submit_button = gr.Button("Submit", scale=1)

	with gr.Row():
	response = gr.Textbox(visible=False, label="Read out Chat Response")
	output_audio = gr.Audio(label="Reply Audio", type="filepath", scale = 9)
	read_out_loud = gr.Button("Read Reply", scale = 1)

	with gr.Accordion(label = "Settings"):
	native_language = gr.components.Dropdown(choices=["English","中文","Spanish"], value="English", allow_custom_value=True, label="I speak")
	language = gr.components.Dropdown(choices=["English","中文","Spanish"], value="English", allow_custom_value=True, label="I want to learn")
	persona = gr.components.Textbox(value = "LinguAI Chatbot", label="I want to talk to")
	tone = gr.components.Dropdown(choices=["Casual","Formal"], value="Casual", label="Tone")

	chat_input.submit(chat, [chat_input, chatbot, native_language, language, persona, tone], [chat_input, chatbot, response])
	submit_button.click(chat, [chat_input, chatbot, native_language, language, persona, tone], [chat_input, chatbot, response])
	read_out_loud.click(gtts,[response,language],output_audio)

	demo.launch()