Spaces:
Runtime error
Runtime error
| import os | |
| import gradio as gr | |
| #import elevenlabs | |
| #from elevenlabs.client import ElevenLabs | |
| #from speech_recognition import Recognizer, AudioFile | |
| import io | |
| from google import genai | |
| from google.genai import types | |
| #from dotenv import load_dotenv | |
| #from elevenlabs import play | |
| from TTS.api import TTS | |
| #load_dotenv() | |
| #elevenlabs = ElevenLabs( | |
| # api_key=os.getenv("ELEVENLABS_API_KEY"), | |
| #) | |
| # --- TTS Setup --- | |
| model_tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", progress_bar=False, gpu=False) | |
| # --- API Keys (Set as environment variables for security!) --- | |
| genai_api_key = os.environ.get("GOOGLE_API_KEY") # Gemini API Key | |
| #elevenlabs_api_key = os.environ.get("ELEVENLABS_API_KEY") | |
| #elevenlabs.set_api_key(elevenlabs_api_key) | |
| #elevenlabs.API_KEY = elevenlabs_api_key | |
| client = genai.Client(api_key=genai_api_key) | |
| # --- ElevenLabs Voice --- | |
| voice = "p225"#"Bella" # Choose a voice from ElevenLabs | |
| # --- Language Tutor Parameters --- | |
| target_language = "Arabic" | |
| difficulty = 1 # 1 = Easy, 2 = Medium, 3 = Hard | |
| # --- Gemini Model --- | |
| #model = genai.GenerativeModel('gemini-pro') # Or 'gemini-pro-vision' if you need image input | |
| #model = genai.GenerativeModel('gemini-1.5-pro-latest') | |
| #model = genai.GenerativeModel('gemini-2-flash') | |
| #model = genai.GenerativeModel('gemini-pro', generate_response_clause=genai.types.GenerateResponseClause(model='gemini-pro')) | |
| # --- Functions --- | |
| def generate_question(difficulty): | |
| """Generates a Arabic question based on difficulty using Gemini.""" | |
| prompt = f"Generate a simple {target_language} question for a language learner at difficulty level {difficulty}. Just the question, no extra text." | |
| response = client.models.generate_content( | |
| model="gemini-2.0-flash", | |
| contents=prompt, | |
| config=types.GenerateContentConfig(temperature=0.95) | |
| ) | |
| #response = model.generate_content(prompt) | |
| return response.text.strip() | |
| def evaluate_answer(question, answer): | |
| """Evaluates the user's answer using Gemini.""" | |
| prompt = f"You are a Arabic language tutor. Evaluate the following answer to the question: '{question}'. Answer: '{answer}'. Provide feedback on grammar, vocabulary, and fluency. Keep the feedback concise (under 50 words). Also, give a score from 1-5 (1 being very poor, 5 being excellent)." | |
| #response = model.generate_content(prompt) | |
| response = client.models.generate_content( | |
| model="gemini-2.0-flash", | |
| contents=prompt, | |
| config=types.GenerateContentConfig(temperature=0.1) | |
| ) | |
| return response.text.strip() | |
| def text_to_speech(text, voice): | |
| """Converts text to speech using ElevenLabs.""" | |
| #audio = elevenlabs.text_to_speech.convert( | |
| #text=text, | |
| #voice_id="JBFqnCBsd6RMkjVDRZzb", | |
| #model_id="eleven_monolingual_v1", | |
| #output_format="mp3_44100_128", | |
| #) | |
| #audio = elevenlabs.tts(text=text, voice=voice, model="eleven_monolingual_v1") | |
| #return b"".join(audio) | |
| #wav = tts.tts(text=text)#, language=target_language) | |
| wav = tts_model.tts_to_file(text=text, language=target_language)#, voice="p225", split_sentences=True)# speaker_wav="/path/to/target/speaker.wav", split_sentences=True) | |
| return wav | |
| def transcribe_audio(audio_file): | |
| """Transcribes audio using SpeechRecognition.""" | |
| r = Recognizer() | |
| with AudioFile(audio_file) as source: | |
| audio = r.record(source) | |
| try: | |
| text = r.recognize_google(audio, language=target_language) # You might need to adjust the language code | |
| return text | |
| except Exception as e: | |
| return f"Error transcribing audio: {e}" | |
| def run_tutor(): | |
| """Main function to run the tutor.""" | |
| question = generate_question(difficulty) | |
| question_audio = text_to_speech(question, voice) | |
| return question_audio, question | |
| def process_answer(audio_file, question, question_audio): | |
| """Processes the user's answer and provides feedback.""" | |
| user_answer = transcribe_audio(audio_file) | |
| feedback = evaluate_answer(question, user_answer) | |
| feedback_audio = text_to_speech(feedback, voice) | |
| return feedback_audio, feedback | |
| # --- Gradio Interface --- | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# Adaptive Language Tutor (Arabic)") | |
| with gr.Row(): | |
| generate_button = gr.Button("Generate Question") | |
| question_audio_output = gr.Audio(label="Question") | |
| question_text_output = gr.Textbox(label="Question Text") | |
| with gr.Row(): | |
| feedback_audio_output = gr.Audio(label="Feedback") | |
| feedback_text_output = gr.Textbox(label="Feedback") | |
| mic_input = gr.Audio(label="Speak Your Answer") | |
| generate_button.click( | |
| fn=run_tutor, | |
| outputs=[question_audio_output, question_text_output] | |
| ) | |
| mic_input.change( | |
| fn=process_answer, | |
| inputs=[mic_input, question_text_output, question_audio_output], | |
| outputs=[feedback_audio_output, feedback_text_output] | |
| ) | |
| demo.launch() | |
| ''' | |
| def run_tutor(audio_file): | |
| """Main function to run the tutor.""" | |
| question = generate_question(difficulty) | |
| question_audio = text_to_speech(question, voice) | |
| # Display the question in the interface | |
| yield question_audio, question, None, None | |
| # Transcribe the user's answer | |
| user_answer = transcribe_audio(audio_file) | |
| # Evaluate the answer | |
| feedback = evaluate_answer(question, user_answer) | |
| feedback_audio = text_to_speech(feedback, voice) | |
| yield None, None, feedback_audio, feedback | |
| #return question_audio, feedback_audio, question, user_answer, feedback | |
| # --- Gradio Interface --- | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# Adaptive Language Tutor (Arabic)") | |
| # with gr.Row(): | |
| # question_audio_output = gr.Audio(label="Question") | |
| # feedback_audio_output = gr.Audio(label="Feedback") | |
| # with gr.Row(): | |
| # feedback_audio_output = gr.Audio(label="Feedback") | |
| # feedback_text_output = gr.Textbox(label="Feedback") | |
| with gr.Row(): | |
| question_audio_output = gr.Audio(label="Question") | |
| question_text_output = gr.Textbox(label="Question Text") | |
| with gr.Row(): | |
| feedback_audio_output = gr.Audio(label="Feedback") | |
| feedback_text_output = gr.Textbox(label="Feedback") | |
| mic_input = gr.Audio(label="Speak Your Answer") | |
| generate_button = gr.Button("Generate Question") | |
| generate_button.click( | |
| fn=run_tutor, | |
| inputs=mic_input, | |
| outputs=[question_audio_output, question_text_output, feedback_audio_output, feedback_text_output] | |
| ) | |
| #NameError: name 'question_text_output' is not defined. Did you mean: 'question_audio_output'? | |
| # | |
| # question_text_output = gr.Textbox(label="Question Text") | |
| # answer_text_output = gr.Textbox(label="Your Answer") | |
| # feedback_text_output = gr.Textbox(label="Feedback") | |
| # mic_input = gr.Audio(label="Speak Your Answer")# | |
| # mic_input.change( | |
| # fn=run_tutor, | |
| # inputs=mic_input, | |
| # outputs=[question_audio_output, feedback_audio_output, question_text_output, answer_text_output, feedback_text_output] | |
| # ) | |
| demo.launch() | |
| ''' |