Spaces:
Sleeping
Sleeping
| """Language Tutor Application | |
| This script provides a Gradio-based web interface for a language tutoring assistant. | |
| It uses OpenAI's GPT-4 model to generate language-specific responses and Speechify's | |
| text-to-speech service to synthesize audio in multiple languages (Portuguese, French, Spanish). | |
| The application supports running both locally and in Hugging Face Spaces environments. | |
| """ | |
| import os | |
| import base64 | |
| import uuid | |
| import gradio as gr | |
| from openai import OpenAI | |
| from speechify import Speechify | |
| from dotenv import load_dotenv | |
| # Detect Hugging Face environment | |
| RUNNING_IN_SPACES = os.getenv("SYSTEM") == "spaces" | |
| # Load API keys | |
| # Load environment variables from .env when not running in Spaces | |
| if not RUNNING_IN_SPACES: | |
| load_dotenv() | |
| openai_api_key = os.getenv("OPENAI_API_KEY") | |
| speechify_api_key = os.getenv("SPEECHIFY_API_KEY") | |
| # Sanity check (but don't print full keys) | |
| print(f"β OPENAI_API_KEY loaded: {'β ' if openai_api_key else 'β MISSING'}") | |
| print(f"β SPEECHIFY_API_KEY loaded: {'β ' if speechify_api_key else 'β MISSING'}") | |
| # Initialize clients | |
| openai_client = OpenAI(api_key=openai_api_key) | |
| speechify_client = Speechify(token=speechify_api_key) | |
| # Voice config | |
| language_config = { | |
| "Portuguese": { | |
| "voice_id": "agueda", | |
| "language": "pt-PT", | |
| "model": "simba-multilingual", | |
| "audio_format": "mp3" | |
| }, | |
| "French": { | |
| "voice_id": "leo", | |
| "language": "fr-FR", | |
| "model": "simba-multilingual", | |
| "audio_format": "mp3" | |
| }, | |
| "Spanish": { | |
| "voice_id": "danna-sofia", | |
| "language": "es-MX", | |
| "model": "simba-multilingual", | |
| "audio_format": "mp3" | |
| }, | |
| "Korean": { | |
| "voice_id": "yoon-jung", | |
| "language": "ko-KR", | |
| "model": "simba-multilingual", | |
| "audio_format": "mp3" | |
| }, | |
| } | |
| def chat_and_speak(user_input, language_choice, history, show_translation): | |
| # Step 0: Initialize response variables | |
| gpt_response = "" | |
| english_translation = "" | |
| audio_output_path = None | |
| try: | |
| # Step 1: Input validation | |
| if not user_input or not user_input.strip(): | |
| return None, ("", ""), "Please enter some text to process.", history | |
| print(f"π§ User input: {user_input}") | |
| print(f"π£οΈ Language choice: {language_choice}") | |
| # Build messages with history for GPT interaction | |
| system_message = f"You are a friendly {language_choice} language tutor. Respond only in {language_choice}." | |
| messages = [{"role": "system", "content": system_message}] | |
| if history: | |
| for user_msg, assistant_msg in history: | |
| messages.append({"role": "user", "content": user_msg}) | |
| messages.append({"role": "assistant", "content": assistant_msg}) | |
| messages.append({"role": "user", "content": user_input}) | |
| # Step 2: GPT interaction to generate response | |
| completion = openai_client.chat.completions.create( | |
| model="gpt-4", | |
| messages=messages | |
| ) | |
| gpt_response = completion.choices[0].message.content | |
| print(f"π¬ GPT response: {gpt_response}") | |
| # Step 2b: Get English translation | |
| translation_prompt = f"Translate the following text to English:\n\n{gpt_response}" | |
| translation_completion = openai_client.chat.completions.create( | |
| model="gpt-4", | |
| messages=[{"role": "system", "content": "You translate text to English."}, | |
| {"role": "user", "content": translation_prompt}] | |
| ) | |
| english_translation = translation_completion.choices[0].message.content | |
| print(f"π English translation: {english_translation}") | |
| # Step 3: Voice synthesis using Speechify | |
| config = language_config.get(language_choice) | |
| if not config: | |
| error_msg = f"β οΈ Language '{language_choice}' not supported." | |
| print(error_msg) | |
| return None, (gpt_response, english_translation), f"{gpt_response}\n\n{error_msg}", history | |
| tts_response = speechify_client.tts.audio.speech( | |
| input=gpt_response, | |
| voice_id=config["voice_id"], | |
| model=config["model"], | |
| audio_format=config["audio_format"] | |
| ) | |
| if hasattr(tts_response, "audio_data") and isinstance(tts_response.audio_data, str) and tts_response.audio_data: | |
| try: | |
| audio_bytes = base64.b64decode(tts_response.audio_data) | |
| output_dir = "/tmp" if RUNNING_IN_SPACES else "speech_files" | |
| os.makedirs(output_dir, exist_ok=True) | |
| audio_output_path = os.path.join(output_dir, f"speech_{uuid.uuid4().hex}.mp3") | |
| with open(audio_output_path, "wb") as f: | |
| f.write(audio_bytes) | |
| except Exception as audio_err: | |
| print(f"π₯ Error processing audio data: {audio_err}") | |
| return None, (gpt_response, english_translation), f"{gpt_response}\n\nβ οΈ Error saving audio: {audio_err}", history | |
| else: | |
| print("β οΈ No audio data received from Speechify or audio_data is not a string.") | |
| return None, (gpt_response, english_translation), f"{gpt_response}\n\nβ οΈ No audio data received from Speechify.", history | |
| # Append new interaction to history | |
| history = history or [] | |
| history.append((user_input, gpt_response)) | |
| return audio_output_path, (gpt_response, english_translation), history | |
| except Exception as e: | |
| # Step 4: Error handling | |
| print(f"π₯ An unexpected error occurred: {e}") | |
| error_message = f"β οΈ An unexpected error occurred: {e}" | |
| if gpt_response: | |
| return None, (gpt_response, english_translation), f"{gpt_response}\n\n{error_message}", history | |
| return None, ("", ""), error_message, history | |
| # Load custom CSS for UI styling | |
| with open("custom.css") as f: | |
| custom_css = f.read() | |
| def update_display_text(chat_output_pair, show_translation): | |
| original, translated = chat_output_pair or ("", "") | |
| return translated if show_translation and translated else original | |
| # Toggle translation display helper | |
| def toggle_translation(chat_output_pair, show_translation): | |
| return update_display_text(chat_output_pair, show_translation) | |
| # Define Gradio UI layout | |
| with gr.Blocks(css=custom_css) as demo: | |
| gr.HTML( | |
| '<div class="custom-bar"><span class="custom-bar-title">Language Tutor</span></div>' | |
| ) | |
| with gr.Column(elem_classes="main-card"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| user_input = gr.Textbox(label="Type in whatever language you prefer", placeholder="Type here...", lines=4) | |
| language_choice = gr.Dropdown( | |
| choices=["Portuguese", "French", "Spanish", "Korean"], | |
| value="Portuguese", | |
| label="Language" | |
| ) | |
| show_translation = gr.Checkbox(label="Show English Translation", value=False) | |
| submit_btn = gr.Button("Submit") | |
| chat_history = gr.State([]) | |
| chat_output_pair = gr.State(("", "")) # (original, translation) | |
| with gr.Column(): | |
| audio_output = gr.Audio(label="Audio Playback", type="filepath", autoplay=True) | |
| gpt_output = gr.Textbox(label="The Response") | |
| submit_btn.click( | |
| fn=chat_and_speak, | |
| inputs=[user_input, language_choice, chat_history, show_translation], | |
| outputs=[audio_output, chat_output_pair, chat_history] | |
| ).then( | |
| fn=update_display_text, | |
| inputs=[chat_output_pair, show_translation], | |
| outputs=gpt_output | |
| ) | |
| show_translation.change( | |
| fn=toggle_translation, | |
| inputs=[chat_output_pair, show_translation], | |
| outputs=gpt_output | |
| ) | |
| # Launch the Gradio app | |
| if __name__ == "__main__": | |
| demo.launch() |