Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import requests | |
| import json | |
| import random | |
| from gradio_client import Client | |
| from dotenv import load_dotenv | |
| import os | |
| import speech_recognition as sr | |
| from pydub import AudioSegment | |
| import re | |
| load_dotenv() | |
| API_KEY = os.getenv("DEEPSEEK_API_KEY") | |
| HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN") | |
| TTS_PASSWORD = os.getenv("TTS_PASSWORD") | |
| if not all([API_KEY, HF_TOKEN, TTS_PASSWORD]): | |
| raise ValueError("Missing required environment variables!") | |
| TTS_CLIENT = Client("KindSynapse/Youssef-Ahmed-Private-Text-To-Speech-Unlimited", hf_token=HF_TOKEN) | |
| recognizer = sr.Recognizer() | |
| MAIN_SYSTEM_PROMPT = { | |
| "role": "system", | |
| "content": """You are Sam, an intelligent and proactive English tutor. You drive the conversation and actively engage students. Your responses must be in JSON format with these keys: | |
| 'response': Your main response (keep it conversational and engaging), | |
| 'corrections': ALWAYS provide specific grammar or pronunciation corrections with examples (if none needed, say "Great grammar!"), | |
| 'vocabulary': ALWAYS suggest alternative words/phrases with explanations (if none needed, suggest related vocabulary), | |
| 'level_assessment': Current assessment (beginner/intermediate/advanced), | |
| 'encouragement': A motivating comment, | |
| 'context_memory': Important details about the user, | |
| 'next_question': A follow-up question to keep conversation flowing | |
| IMPORTANT: You MUST always provide corrections and vocabulary suggestions in every response. Even if the student speaks perfectly, provide positive feedback and suggest advanced vocabulary or alternative expressions. | |
| Your personality: | |
| - Be the conversation driver - ask follow-up questions | |
| - Show genuine interest in the student's life | |
| - Provide corrections naturally without stopping the flow | |
| - Use the student's name frequently | |
| - Build on previous topics | |
| - Be encouraging but provide constructive feedback | |
| - Ask about their day, work, hobbies, culture, goals | |
| Correction guidelines: | |
| - ALWAYS provide corrections field - even if it's positive feedback | |
| - ALWAYS provide vocabulary field - suggest alternatives or related words | |
| - Use format: "Instead of 'X', try saying 'Y'" | |
| - Give pronunciation tips when needed | |
| - If no mistakes, say "Excellent grammar!" or "Perfect sentence structure!" | |
| Vocabulary guidelines: | |
| - ALWAYS suggest vocabulary - even if it's synonyms or advanced alternatives | |
| - Provide explanations for suggested words | |
| - Use format: "Instead of 'good', try 'excellent' or 'outstanding'" | |
| - Suggest topic-related vocabulary | |
| Conversation flow: | |
| - Start with personal questions (name, country, job, hobbies) | |
| - Build conversations around their interests | |
| - Use profession-specific vocabulary | |
| - Ask about their culture and experiences | |
| - Keep the conversation natural and flowing | |
| - Always end with a question to continue the dialogue | |
| Response length: Keep responses conversational (2-3 sentences max for response field).""" | |
| } | |
| WELCOME_PROMPT = { | |
| "role": "system", | |
| "content": """Create a heartfelt welcome message that: | |
| 1. Introduces you as Sam, an enthusiastic and friendly English tutor whoβs excited to guide them | |
| 2. Kindly asks for their name and where theyβre from in a natural conversational way | |
| 3. Expresses genuine excitement about helping them grow | |
| Return the message in JSON format with the key 'greeting'. | |
| Make it feel personal, warm, and inviting β like a tutor who truly cares. Keep it within 2 sentences. | |
| Example: | |
| {"greeting": "Hi there! I'm Sam, your friendly English tutor β so glad you're here! What's your name and where are you from?"} | |
| """ | |
| } | |
| class EnglishTutor: | |
| def __init__(self): | |
| self.chat_history = [MAIN_SYSTEM_PROMPT] | |
| self.user_info = { | |
| "name": None, | |
| "level": "beginner", | |
| "interests": [], | |
| "country": None, | |
| "profession": None, | |
| "goals": None | |
| } | |
| def get_welcome_message(self): | |
| try: | |
| response = requests.post( | |
| "https://api.deepseek.com/v1/chat/completions", | |
| headers={"Authorization": f"Bearer {API_KEY}"}, | |
| json={ | |
| "model": "deepseek-chat", | |
| "messages": [WELCOME_PROMPT], | |
| "temperature": random.uniform(0.5, 1.0), | |
| "response_format": {"type": "json_object"} | |
| } | |
| ) | |
| welcome_json = json.loads(response.json()["choices"][0]["message"]["content"]) | |
| return welcome_json["greeting"] | |
| except Exception as e: | |
| print(f"Error in welcome message: {str(e)}") | |
| return "Hi! I'm Sam, your English tutor. What's your name and where are you from?" | |
| def get_bot_response(self, user_message): | |
| try: | |
| # Add user context to the message | |
| context_info = f"User info: {self.user_info}" | |
| enhanced_message = f"{user_message}\n\n[Context: {context_info}]" | |
| self.chat_history.append({"role": "user", "content": enhanced_message}) | |
| response = requests.post( | |
| "https://api.deepseek.com/v1/chat/completions", | |
| headers={"Authorization": f"Bearer {API_KEY}"}, | |
| json={ | |
| "model": "deepseek-chat", | |
| "messages": self.chat_history, | |
| "temperature": random.uniform(0.8, 1.0), | |
| "response_format": {"type": "json_object"} | |
| } | |
| ) | |
| bot_response = json.loads(response.json()["choices"][0]["message"]["content"]) | |
| # Update user info | |
| if "level_assessment" in bot_response: | |
| self.user_info["level"] = bot_response["level_assessment"] | |
| if "context_memory" in bot_response: | |
| self._update_user_info(bot_response["context_memory"]) | |
| self.chat_history.append({"role": "assistant", "content": json.dumps(bot_response)}) | |
| return bot_response | |
| except Exception as e: | |
| print(f"Error getting bot response: {str(e)}") | |
| return { | |
| "response": "I apologize, but I couldn't process that properly. Could you try again?", | |
| "corrections": "", | |
| "vocabulary": "", | |
| "level_assessment": "beginner", | |
| "encouragement": "Don't worry, let's keep practicing!", | |
| "context_memory": "", | |
| "next_question": "What would you like to talk about?" | |
| } | |
| def _update_user_info(self, context_memory): | |
| if isinstance(context_memory, str): | |
| # Try to extract name if mentioned | |
| if "name" in context_memory.lower(): | |
| name_match = re.search(r"name[:\s]+([A-Za-z]+)", context_memory) | |
| if name_match: | |
| self.user_info["name"] = name_match.group(1) | |
| # Try to extract country if mentioned | |
| if "country" in context_memory.lower() or "from" in context_memory.lower(): | |
| country_match = re.search(r"(?:from|country)[:\s]+([A-Za-z\s]+)", context_memory) | |
| if country_match: | |
| self.user_info["country"] = country_match.group(1).strip() | |
| elif isinstance(context_memory, dict): | |
| for key in self.user_info: | |
| if key in context_memory: | |
| self.user_info[key] = context_memory[key] | |
| def clean_text_for_tts(self, text): | |
| # Remove emojis and special characters that might cause TTS issues | |
| text = re.sub(r'[π―πβ¨π«π€π€]', '', text) | |
| # Remove extra spaces and newlines | |
| text = re.sub(r'\s+', ' ', text).strip() | |
| # Remove duplicate words at the beginning | |
| words = text.split() | |
| if len(words) > 1 and words[0].lower() == words[1].lower(): | |
| text = ' '.join(words[1:]) | |
| return text | |
| def convert_audio_to_text(audio_path): | |
| try: | |
| if not audio_path.endswith('.wav'): | |
| audio = AudioSegment.from_file(audio_path) | |
| wav_path = audio_path + '.wav' | |
| audio.export(wav_path, format='wav') | |
| audio_path = wav_path | |
| with sr.AudioFile(audio_path) as source: | |
| audio = recognizer.record(source) | |
| text = recognizer.recognize_google(audio, language='en-US') | |
| return text | |
| except Exception as e: | |
| print(f"Error in speech recognition: {str(e)}") | |
| return None | |
| def text_to_speech(text): | |
| try: | |
| result = TTS_CLIENT.predict( | |
| password=TTS_PASSWORD, | |
| prompt=text, | |
| voice="coral", | |
| emotion="Warm and friendly", | |
| use_random_seed=True, | |
| specific_seed=12345, | |
| api_name="/text_to_speech_app" | |
| ) | |
| return result[0] if isinstance(result, (list, tuple)) else result | |
| except Exception as e: | |
| print(f"Error in text to speech: {str(e)}") | |
| return None | |
| tutor = EnglishTutor() | |
| def initialize_chat(): | |
| try: | |
| welcome = tutor.get_welcome_message() | |
| clean_welcome = tutor.clean_text_for_tts(welcome) | |
| welcome_audio = text_to_speech(clean_welcome) | |
| history = [{"role": "assistant", "content": welcome}] | |
| return history, welcome_audio, f"π€ Sam: {welcome}", "" | |
| except Exception as e: | |
| print(f"Error initializing chat: {str(e)}") | |
| welcome_msg = "Hi! I'm Sam, your English tutor. What's your name and where are you from?" | |
| history = [{"role": "assistant", "content": welcome_msg}] | |
| return history, None, f"π€ Sam: {welcome_msg}", "" | |
| def process_audio(audio, history, transcript, corrections): | |
| try: | |
| if audio is None: | |
| return history, None, transcript, corrections | |
| user_message = convert_audio_to_text(audio) | |
| if not user_message: | |
| return history, None, transcript, corrections | |
| bot_response = tutor.get_bot_response(user_message) | |
| # Create the main response with follow-up question | |
| main_response = bot_response.get("response", "") | |
| if bot_response.get("next_question"): | |
| main_response += f" {bot_response['next_question']}" | |
| # Add encouragement | |
| if bot_response.get("encouragement"): | |
| main_response += f" {bot_response['encouragement']}" | |
| # Clean text for TTS | |
| clean_response = tutor.clean_text_for_tts(main_response) | |
| audio_response = text_to_speech(clean_response) | |
| # Update chat history | |
| history = history or [] | |
| history.append({"role": "user", "content": user_message}) | |
| history.append({"role": "assistant", "content": main_response}) | |
| # Update transcript | |
| new_transcript = transcript + f"\n\nπ€ You: {user_message}\nπ€ Sam: {main_response}" | |
| # Update corrections and vocabulary with debugging | |
| new_corrections = corrections | |
| correction_parts = [] | |
| # Debug: Print the bot response to see what we're getting | |
| print(f"DEBUG - Bot response keys: {bot_response.keys()}") | |
| print(f"DEBUG - Corrections: '{bot_response.get('corrections', 'NOT FOUND')}'") | |
| print(f"DEBUG - Vocabulary: '{bot_response.get('vocabulary', 'NOT FOUND')}'") | |
| print(f"DEBUG - Level: '{bot_response.get('level_assessment', 'NOT FOUND')}'") | |
| # Always show current level | |
| if bot_response.get("level_assessment"): | |
| correction_parts.append(f"π **Current Level:** {bot_response['level_assessment'].title()}") | |
| # Show corrections if available | |
| if bot_response.get("corrections") and str(bot_response["corrections"]).strip() and bot_response["corrections"] != "": | |
| correction_parts.append(f"βοΈ **Grammar Corrections:**\n{bot_response['corrections']}") | |
| # Show vocabulary if available | |
| if bot_response.get("vocabulary") and str(bot_response["vocabulary"]).strip() and bot_response["vocabulary"] != "": | |
| vocab = bot_response['vocabulary'] | |
| if isinstance(vocab, dict): | |
| vocab_text = "\n".join([f"β’ '{k}' β '{v}'" for k, v in vocab.items()]) | |
| else: | |
| vocab_text = str(vocab) | |
| correction_parts.append(f"π **Vocabulary Suggestions:**\n{vocab_text}") | |
| # Show encouragement | |
| if bot_response.get("encouragement"): | |
| correction_parts.append(f"π‘ **Encouragement:**\n{bot_response['encouragement']}") | |
| # Always show user info if available | |
| if tutor.user_info.get("name"): | |
| info_parts = [] | |
| if tutor.user_info.get("name"): info_parts.append(f"Name: {tutor.user_info['name']}") | |
| if tutor.user_info.get("country"): info_parts.append(f"Country: {tutor.user_info['country']}") | |
| if tutor.user_info.get("level"): info_parts.append(f"Level: {tutor.user_info['level']}") | |
| if info_parts: | |
| correction_parts.append(f"π€ **Your Profile:**\n{' | '.join(info_parts)}") | |
| # If still no corrections, show a default message | |
| if not correction_parts: | |
| correction_parts.append("π― **Feedback:** Keep practicing! Sam is analyzing your English and will provide feedback soon.") | |
| # Create the new corrections text | |
| new_correction_text = "\n\n".join(correction_parts) | |
| timestamp = f"[{user_message[:30]}...]" if len(user_message) > 30 else f"[{user_message}]" | |
| if new_corrections: | |
| new_corrections = new_corrections + f"\n\n--- Latest Response {timestamp} ---\n{new_correction_text}" | |
| else: | |
| new_corrections = f"--- Latest Response {timestamp} ---\n{new_correction_text}" | |
| return history, audio_response, new_transcript, new_corrections | |
| except Exception as e: | |
| print(f"Error in process_audio: {str(e)}") | |
| return history, None, transcript, corrections | |
| def submit_recording(audio, history, transcript, corrections): | |
| return process_audio(audio, history, transcript, corrections) | |
| def clear_chat(): | |
| global tutor | |
| tutor = EnglishTutor() | |
| return initialize_chat() | |
| with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
| gr.Markdown("# π English Learning Assistant with Sam") | |
| gr.Markdown("π€ **Record your voice** - Sam will automatically respond when you finish recording and help improve your English!") | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| chatbot = gr.Chatbot( | |
| height=500, | |
| show_label=False, | |
| type='messages', | |
| avatar_images=("π€", "π€") | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| audio_input = gr.Audio( | |
| label="ποΈ Record your voice (auto-submits when finished)", | |
| type="filepath", | |
| show_label=True | |
| ) | |
| with gr.Column(scale=1): | |
| audio_output = gr.Audio( | |
| label="π Sam's response", | |
| type="filepath", | |
| show_label=True, | |
| autoplay=True | |
| ) | |
| with gr.Column(scale=2): | |
| gr.Markdown("### π Live Transcript") | |
| transcript_display = gr.Textbox( | |
| lines=10, | |
| max_lines=10, | |
| show_label=False, | |
| interactive=False, | |
| placeholder="Your conversation will appear here...", | |
| container=True | |
| ) | |
| gr.Markdown("### π Learning Corner") | |
| corrections_display = gr.Textbox( | |
| lines=8, | |
| max_lines=8, | |
| show_label=False, | |
| interactive=False, | |
| placeholder="Grammar corrections, vocabulary suggestions, and level assessment will appear here...", | |
| container=True | |
| ) | |
| with gr.Row(): | |
| clear_btn = gr.Button("π Start New Conversation", variant="secondary", size="lg") | |
| gr.Markdown("π‘ **Tip**: Sam will actively guide the conversation and provide personalized feedback!") | |
| # Auto-submit when audio is recorded | |
| audio_input.change( | |
| process_audio, | |
| inputs=[audio_input, chatbot, transcript_display, corrections_display], | |
| outputs=[chatbot, audio_output, transcript_display, corrections_display] | |
| ) | |
| clear_btn.click( | |
| clear_chat, | |
| outputs=[chatbot, audio_output, transcript_display, corrections_display] | |
| ) | |
| demo.load( | |
| initialize_chat, | |
| outputs=[chatbot, audio_output, transcript_display, corrections_display] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |