import os import gradio as gr #import elevenlabs #from elevenlabs.client import ElevenLabs #from speech_recognition import Recognizer, AudioFile import io from google import genai from google.genai import types #from dotenv import load_dotenv #from elevenlabs import play from TTS.api import TTS #load_dotenv() #elevenlabs = ElevenLabs( # api_key=os.getenv("ELEVENLABS_API_KEY"), #) # --- TTS Setup --- model_tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", progress_bar=False, gpu=False) # --- API Keys (Set as environment variables for security!) --- genai_api_key = os.environ.get("GOOGLE_API_KEY") # Gemini API Key #elevenlabs_api_key = os.environ.get("ELEVENLABS_API_KEY") #elevenlabs.set_api_key(elevenlabs_api_key) #elevenlabs.API_KEY = elevenlabs_api_key client = genai.Client(api_key=genai_api_key) # --- ElevenLabs Voice --- voice = "p225"#"Bella" # Choose a voice from ElevenLabs # --- Language Tutor Parameters --- target_language = "Arabic" difficulty = 1 # 1 = Easy, 2 = Medium, 3 = Hard # --- Gemini Model --- #model = genai.GenerativeModel('gemini-pro') # Or 'gemini-pro-vision' if you need image input #model = genai.GenerativeModel('gemini-1.5-pro-latest') #model = genai.GenerativeModel('gemini-2-flash') #model = genai.GenerativeModel('gemini-pro', generate_response_clause=genai.types.GenerateResponseClause(model='gemini-pro')) # --- Functions --- def generate_question(difficulty): """Generates a Arabic question based on difficulty using Gemini.""" prompt = f"Generate a simple {target_language} question for a language learner at difficulty level {difficulty}. Just the question, no extra text." response = client.models.generate_content( model="gemini-2.0-flash", contents=prompt, config=types.GenerateContentConfig(temperature=0.95) ) #response = model.generate_content(prompt) return response.text.strip() def evaluate_answer(question, answer): """Evaluates the user's answer using Gemini.""" prompt = f"You are a Arabic language tutor. Evaluate the following answer to the question: '{question}'. Answer: '{answer}'. Provide feedback on grammar, vocabulary, and fluency. Keep the feedback concise (under 50 words). Also, give a score from 1-5 (1 being very poor, 5 being excellent)." #response = model.generate_content(prompt) response = client.models.generate_content( model="gemini-2.0-flash", contents=prompt, config=types.GenerateContentConfig(temperature=0.1) ) return response.text.strip() def text_to_speech(text, voice): """Converts text to speech using ElevenLabs.""" #audio = elevenlabs.text_to_speech.convert( #text=text, #voice_id="JBFqnCBsd6RMkjVDRZzb", #model_id="eleven_monolingual_v1", #output_format="mp3_44100_128", #) #audio = elevenlabs.tts(text=text, voice=voice, model="eleven_monolingual_v1") #return b"".join(audio) #wav = tts.tts(text=text)#, language=target_language) wav = tts_model.tts_to_file(text=text, language=target_language)#, voice="p225", split_sentences=True)# speaker_wav="/path/to/target/speaker.wav", split_sentences=True) return wav def transcribe_audio(audio_file): """Transcribes audio using SpeechRecognition.""" r = Recognizer() with AudioFile(audio_file) as source: audio = r.record(source) try: text = r.recognize_google(audio, language=target_language) # You might need to adjust the language code return text except Exception as e: return f"Error transcribing audio: {e}" def run_tutor(): """Main function to run the tutor.""" question = generate_question(difficulty) question_audio = text_to_speech(question, voice) return question_audio, question def process_answer(audio_file, question, question_audio): """Processes the user's answer and provides feedback.""" user_answer = transcribe_audio(audio_file) feedback = evaluate_answer(question, user_answer) feedback_audio = text_to_speech(feedback, voice) return feedback_audio, feedback # --- Gradio Interface --- with gr.Blocks() as demo: gr.Markdown("# Adaptive Language Tutor (Arabic)") with gr.Row(): generate_button = gr.Button("Generate Question") question_audio_output = gr.Audio(label="Question") question_text_output = gr.Textbox(label="Question Text") with gr.Row(): feedback_audio_output = gr.Audio(label="Feedback") feedback_text_output = gr.Textbox(label="Feedback") mic_input = gr.Audio(label="Speak Your Answer") generate_button.click( fn=run_tutor, outputs=[question_audio_output, question_text_output] ) mic_input.change( fn=process_answer, inputs=[mic_input, question_text_output, question_audio_output], outputs=[feedback_audio_output, feedback_text_output] ) demo.launch() ''' def run_tutor(audio_file): """Main function to run the tutor.""" question = generate_question(difficulty) question_audio = text_to_speech(question, voice) # Display the question in the interface yield question_audio, question, None, None # Transcribe the user's answer user_answer = transcribe_audio(audio_file) # Evaluate the answer feedback = evaluate_answer(question, user_answer) feedback_audio = text_to_speech(feedback, voice) yield None, None, feedback_audio, feedback #return question_audio, feedback_audio, question, user_answer, feedback # --- Gradio Interface --- with gr.Blocks() as demo: gr.Markdown("# Adaptive Language Tutor (Arabic)") # with gr.Row(): # question_audio_output = gr.Audio(label="Question") # feedback_audio_output = gr.Audio(label="Feedback") # with gr.Row(): # feedback_audio_output = gr.Audio(label="Feedback") # feedback_text_output = gr.Textbox(label="Feedback") with gr.Row(): question_audio_output = gr.Audio(label="Question") question_text_output = gr.Textbox(label="Question Text") with gr.Row(): feedback_audio_output = gr.Audio(label="Feedback") feedback_text_output = gr.Textbox(label="Feedback") mic_input = gr.Audio(label="Speak Your Answer") generate_button = gr.Button("Generate Question") generate_button.click( fn=run_tutor, inputs=mic_input, outputs=[question_audio_output, question_text_output, feedback_audio_output, feedback_text_output] ) #NameError: name 'question_text_output' is not defined. Did you mean: 'question_audio_output'? # # question_text_output = gr.Textbox(label="Question Text") # answer_text_output = gr.Textbox(label="Your Answer") # feedback_text_output = gr.Textbox(label="Feedback") # mic_input = gr.Audio(label="Speak Your Answer")# # mic_input.change( # fn=run_tutor, # inputs=mic_input, # outputs=[question_audio_output, feedback_audio_output, question_text_output, answer_text_output, feedback_text_output] # ) demo.launch() '''