Spaces:

MikeMai
/

ChatLingo

Runtime error

File size: 9,400 Bytes

from openai import OpenAI
from dotenv import load_dotenv
import MAIAI

# from deep_translator import GoogleTranslator

# import speech_recognition as sr
# import assemblyai as aai

# import pyttsx3
from gtts import gTTS

import gradio as gr
from gradio.themes.base import Base

# Load environment variables
load_dotenv()

language_map = {
    "Afrikaans": "af",
    "Albanian": "sq",
    "Amharic": "am",
    "Arabic": "ar",
    "Armenian": "hy",
    "Azerbaijani": "az",
    "Basque": "eu",
    "Bengali": "bn",
    "Bosnian": "bs",
    "Bulgarian": "bg",
    "Catalan": "ca",
    "Cebuano": "ceb",
    "中文" :"zh-CN",
    "繁体中文": "zh-TW",
    "简体中文": "zh-CN",
    "Chinese" : "zh-CN",
    "Chinese (Simplified)": "zh-CN",
    "Chinese (Traditional)": "zh-TW",
    "Corsican": "co",
    "Croatian": "hr",
    "Czech": "cs",
    "Danish": "da",
    "Dutch": "nl",
    "English": "en",
    "Esperanto": "eo",
    "Estonian": "et",
    "Filipino": "fil",
    "Finnish": "fi",
    "French": "fr",
    "Frisian": "fy",
    "Galician": "gl",
    "Georgian": "ka",
    "German": "de",
    "Greek": "el",
    "Gujarati": "gu",
    "Haitian Creole": "ht",
    "Hausa": "ha",
    "Hawaiian": "haw",
    "Hebrew": "he",
    "Hindi": "hi",
    "Hmong": "hmn",
    "Hungarian": "hu",
    "Icelandic": "is",
    "Igbo": "ig",
    "Indonesian": "id",
    "Irish": "ga",
    "Italian": "it",
    "Japanese": "ja",
    "Javanese": "jv",
    "Kannada": "kn",
    "Kazakh": "kk",
    "Khmer": "km",
    "Kinyarwanda": "rw",
    "Korean": "ko",
    "Kurdish": "ku",
    "Kyrgyz": "ky",
    "Lao": "lo",
    "Latin": "la",
    "Latvian": "lv",
    "Lithuanian": "lt",
    "Luxembourgish": "lb",
    "Macedonian": "mk",
    "Malagasy": "mg",
    "Malay": "ms",
    "Malayalam": "ml",
    "Maltese": "mt",
    "Maori": "mi",
    "Marathi": "mr",
    "Mongolian": "mn",
    "Myanmar (Burmese)": "my",
    "Nepali": "ne",
    "Norwegian": "no",
    "Nyanja (Chichewa)": "ny",
    "Odia (Oriya)": "or",
    "Pashto": "ps",
    "Persian": "fa",
    "Polish": "pl",
    "Portuguese": "pt",
    "Punjabi": "pa",
    "Romanian": "ro",
    "Russian": "ru",
    "Samoan": "sm",
    "Scots Gaelic": "gd",
    "Serbian": "sr",
    "Sesotho": "st",
    "Shona": "sn",
    "Sindhi": "sd",
    "Sinhala (Sinhalese)": "si",
    "Slovak": "sk",
    "Slovenian": "sl",
    "Somali": "so",
    "Spanish": "es",
    "Sundanese": "su",
    "Swahili": "sw",
    "Swedish": "sv",
    "Tajik": "tg",
    "Tamil": "ta",
    "Tatar": "tt",
    "Telugu": "te",
    "Thai": "th",
    "Turkish": "tr",
    "Turkmen": "tk",
    "Ukrainian": "uk",
    "Urdu": "ur",
    "Uyghur": "ug",
    "Uzbek": "uz",
    "Vietnamese": "vi",
    "Welsh": "cy",
    "Xhosa": "xh",
    "Yiddish": "yi",
    "Yoruba": "yo",
    "Zulu": "zu"
}

# def pytts(input_text):
#     if input_text:
#         engine = pyttsx3.init()

#         # # Get available voices and print them out
#         # voices = engine.getProperty('voices')
#         # for index, voice in enumerate(voices):
#         #     print(f"Voice {index}: {voice.id} - {voice.languages} - {voice.gender} - {voice.name}")

#         # # Set voice (change index based on what is available on your system)
#         # engine.setProperty('voice', voices[1].id)  # Change the index to switch voices

#         # # Set speech rate
#         # rate = engine.getProperty('rate')
#         # engine.setProperty('rate', rate - 50)  # Decrease rate; increase to make it faster

#         # # Set volume
#         # volume = engine.getProperty('volume')
#         # engine.setProperty('volume', volume + 0.25)  # Increase volume; decrease to lower the volume

#         # Speak text
#         engine.say(input_text)
#         engine.runAndWait()

def gtts(input_text,language='English'):
    if input_text:
        # Map the user-friendly language name to the IETF tag
        lang = language_map.get(language, 'en')  # Default to 'en' if language not found
        tts = gTTS(text=input_text, lang=lang, slow=False)
        audio_file = "output.mp3"
        tts.save(audio_file)
        return audio_file

# def assembly_speech_to_text(audio_file_path):
#     aai.settings.api_key = "e00881b941ff47ea914594c40f6dbc20"
#     transcriber = aai.Transcriber()
#     transcript = transcriber.transcribe(audio_file_path)
#     return transcript.text

# def google_speech_to_text(audio_file_path):
#     if audio_file_path:
#         recognizer = sr.Recognizer()
#         with sr.AudioFile(audio_file_path) as source:
#             audio_data = recognizer.record(source)
#             try:
#                 text = recognizer.recognize_google(audio_data)
#                 return text
#             except sr.UnknownValueError:
#                 return "Google Speech Recognition could not understand audio"
#             except sr.RequestError as e:
#                 return f"Could not request results from Google Speech Recognition service; {e}"


def openai_speech_to_text(audio_file_path):
    if audio_file_path:
        client = OpenAI()
        audio_file= open(audio_file_path, "rb")
        transcription = client.audio.transcriptions.create(
            model="whisper-1", 
            response_format="text",
            file=audio_file
        )
        return transcription

def chat(text, history, native_language, language, persona, tone = "Casual", model = "gpt-4o-mini"):
    
    print(tone, native_language, language, persona)
    
    # if audio != None:
    #     text = speechtotext(audio)
    
    casual = "This is in a casual, internet texting context, use of local slangs is encouraged." if tone == "Casual" else ""
    
    teacher = MAIAI.Agent(model=model, temperature=0.5, role=f"You are a {language} teacher teaching {native_language} speaking student.")
    responder = MAIAI.Agent(model=model, temperature=0.5, role=f"""You are {language} speaking {persona}. Respond to the user's text in {language}. Refer to Chat History for context. Keep the conversation going. {casual}""")
    translator = MAIAI.Agent(model=model, temperature=0.5, role=f"You are a language translator")
    
    feedback_task = MAIAI.Task(
        agent=teacher,
        goal=f"""Text: {text}

Point out and translate any non-{language} from the text into {language}.
Correct any linguistic error in the text and give example driven feedback on how to improve the text.
You MUST give your feedback in {native_language}.
{casual}
"""
    )
    
    respond_task = MAIAI.Task(
        agent=responder,
        goal=f"""{text}
        
        Respond to the text above in {language}.
        Refer to Chat History for context.
        Chat History: {history}"""
    )
    
    feedback = feedback_task.execute()

    response = respond_task.execute()
    
    translate_task = MAIAI.Task(
        agent=translator,
        goal=f"translate {response} from {language} to {native_language}"
    )
    
    translation = translate_task.execute()
    
    # # Get IETF tags for target and native languages
    # native_lang = language_map.get(native_language, 'en')
    
    #  # Translate the response to the target language
    # translated_response = GoogleTranslator(source='auto', target=native_lang).translate(response)
    
    output = f"""
***Feedback:***

{feedback}

-----------

***{persona}:***

{response}
({translation})
    """
    
    history.append((text, output))
    
    return "", history, response

# Sample Function Call ------------------------------------
    
# feedback,response = chat("Soy jugando Demonslayer! Y tu?", "English","Spanish","friendly lady",,casual_tone="Casual")

# print(f"""
# Feedback: {feedback}

# Reply: {response}
#       """)

# Gradio Custom Chatbot -------------------------------------------------------

with gr.Blocks(fill_height=True, theme=Base()) as demo:
    
    chatbot = gr.Chatbot(
        elem_id="chatbot",
        bubble_full_width=False,
        scale=1,
    )
    
    with gr.Row():
        
        chat_input = gr.Textbox(interactive=True, scale=8,
                            inputs=[gr.components.Audio(sources="microphone", type="filepath", label="Speak or upload audio")],
                            value=openai_speech_to_text)
        submit_button = gr.Button("Submit", scale=1)
    
    with gr.Row():    
        response = gr.Textbox(visible=False, label="Read out Chat Response")
        output_audio = gr.Audio(label="Reply Audio", type="filepath", scale = 9)
        read_out_loud = gr.Button("Read Reply", scale = 1)
    
    with gr.Accordion(label = "Settings"):
            native_language = gr.components.Dropdown(choices=["English","中文","Spanish"], value="English", allow_custom_value=True, label="I speak")
            language = gr.components.Dropdown(choices=["English","中文","Spanish"], value="English", allow_custom_value=True, label="I want to learn")
            persona = gr.components.Textbox(value = "LinguAI Chatbot", label="I want to talk to")
            tone = gr.components.Dropdown(choices=["Casual","Formal"], value="Casual", label="Tone")
            
    chat_input.submit(chat, [chat_input, chatbot, native_language, language, persona, tone], [chat_input, chatbot, response])
    submit_button.click(chat, [chat_input, chatbot, native_language, language, persona, tone], [chat_input, chatbot, response])
    read_out_loud.click(gtts,[response,language],output_audio)

demo.launch()