import gradio as gr import torch from transformers import pipeline import whisper from gtts import gTTS import os import time # --- 1. LOAD THE OPEN MODEL (Qwen) --- print("⏳ Loading Qwen 2.5...") # Setup device (GPU or CPU) device = "cuda" if torch.cuda.is_available() else "cpu" pipe = pipeline( "text-generation", model="Qwen/Qwen2.5-7B-Instruct", model_kwargs={ "torch_dtype": torch.float16, "low_cpu_mem_usage": True, }, device_map="auto", ) # Load Whisper (Ears) whisper_model = whisper.load_model("base.en") print("✅ Model Loaded!") # --- 2. TEACHER PERSONA --- SYSTEM_PROMPT = """ You are a friendly, encouraging English tutor. - Your goal: Help the user practice speaking English. - Level: Intermediate (B1). - Keep responses short (1-3 sentences). - If the user makes a mistake, kindly correct it. """ # --- 3. PROCESSING FUNCTIONS --- def text_to_speech(text): """Converts AI text to audio.""" try: if not text: return None clean_text = text.replace("failed", "") tts = gTTS(text, lang='en') filename = f"response_{int(time.time())}.mp3" tts.save(filename) return filename except Exception as e: print(f"TTS Error: {e}") return None def generate_response(message, history): """Uses the Qwen Pipeline to generate a reply.""" # Format messages for Qwen messages = [{"role": "system", "content": SYSTEM_PROMPT}] # Add history (Handling the standard tuple format) for user_msg, bot_msg in history[-3:]: messages.append({"role": "user", "content": user_msg}) messages.append({"role": "assistant", "content": bot_msg}) messages.append({"role": "user", "content": message}) # Generate outputs = pipe( messages, max_new_tokens=150, do_sample=True, temperature=0.7, ) return outputs[0]["generated_text"][-1]["content"] def conversation_logic(audio_path, text_input, history): """Main loop: Listen -> Think -> Speak.""" user_text = "" # 1. Get Input if audio_path: result = whisper_model.transcribe(audio_path) user_text = result["text"] elif text_input: user_text = text_input else: return history, None, "" if not user_text.strip(): return history, None, "" # 2. Get Intelligence ai_response = generate_response(user_text, history) # 3. Speak Output ai_audio = text_to_speech(ai_response) # 4. Update Chat (Standard tuple format) history.append((user_text, ai_response)) return history, ai_audio, "" # --- 4. BUILD INTERFACE --- with gr.Blocks(title="Qwen English Tutor") as demo: gr.Markdown("# 🗣️ English Tutor (Powered by Qwen 2.5)") gr.Markdown("No API Keys required! Run completely open source.") # FIXED: Removed 'type="messages"' to support older Gradio versions chatbot = gr.Chatbot(label="Conversation") with gr.Row(): audio_in = gr.Audio(sources=["microphone"], type="filepath", label="🎤 Speak") text_in = gr.Textbox(label="⌨️ Type") submit_btn = gr.Button("Send", variant="primary") audio_out = gr.Audio(label="Teacher's Voice", autoplay=True) submit_btn.click( fn=conversation_logic, inputs=[audio_in, text_in, chatbot], outputs=[chatbot, audio_out, text_in] ) demo.launch()