|
|
import gradio as gr |
|
|
import torch |
|
|
from transformers import pipeline |
|
|
import whisper |
|
|
from gtts import gTTS |
|
|
import os |
|
|
import time |
|
|
|
|
|
|
|
|
print("⏳ Loading Qwen 2.5...") |
|
|
|
|
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
|
|
|
pipe = pipeline( |
|
|
"text-generation", |
|
|
model="Qwen/Qwen2.5-7B-Instruct", |
|
|
model_kwargs={ |
|
|
"torch_dtype": torch.float16, |
|
|
"low_cpu_mem_usage": True, |
|
|
}, |
|
|
device_map="auto", |
|
|
) |
|
|
|
|
|
|
|
|
whisper_model = whisper.load_model("base.en") |
|
|
|
|
|
print("✅ Model Loaded!") |
|
|
|
|
|
|
|
|
SYSTEM_PROMPT = """ |
|
|
You are a friendly, encouraging English tutor. |
|
|
- Your goal: Help the user practice speaking English. |
|
|
- Level: Intermediate (B1). |
|
|
- Keep responses short (1-3 sentences). |
|
|
- If the user makes a mistake, kindly correct it. |
|
|
""" |
|
|
|
|
|
|
|
|
|
|
|
def text_to_speech(text): |
|
|
"""Converts AI text to audio.""" |
|
|
try: |
|
|
if not text: return None |
|
|
clean_text = text.replace("failed", "") |
|
|
tts = gTTS(text, lang='en') |
|
|
filename = f"response_{int(time.time())}.mp3" |
|
|
tts.save(filename) |
|
|
return filename |
|
|
except Exception as e: |
|
|
print(f"TTS Error: {e}") |
|
|
return None |
|
|
|
|
|
def generate_response(message, history): |
|
|
"""Uses the Qwen Pipeline to generate a reply.""" |
|
|
|
|
|
|
|
|
messages = [{"role": "system", "content": SYSTEM_PROMPT}] |
|
|
|
|
|
|
|
|
for user_msg, bot_msg in history[-3:]: |
|
|
messages.append({"role": "user", "content": user_msg}) |
|
|
messages.append({"role": "assistant", "content": bot_msg}) |
|
|
|
|
|
messages.append({"role": "user", "content": message}) |
|
|
|
|
|
|
|
|
outputs = pipe( |
|
|
messages, |
|
|
max_new_tokens=150, |
|
|
do_sample=True, |
|
|
temperature=0.7, |
|
|
) |
|
|
|
|
|
return outputs[0]["generated_text"][-1]["content"] |
|
|
|
|
|
def conversation_logic(audio_path, text_input, history): |
|
|
"""Main loop: Listen -> Think -> Speak.""" |
|
|
|
|
|
user_text = "" |
|
|
|
|
|
|
|
|
if audio_path: |
|
|
result = whisper_model.transcribe(audio_path) |
|
|
user_text = result["text"] |
|
|
elif text_input: |
|
|
user_text = text_input |
|
|
else: |
|
|
return history, None, "" |
|
|
|
|
|
if not user_text.strip(): |
|
|
return history, None, "" |
|
|
|
|
|
|
|
|
ai_response = generate_response(user_text, history) |
|
|
|
|
|
|
|
|
ai_audio = text_to_speech(ai_response) |
|
|
|
|
|
|
|
|
history.append((user_text, ai_response)) |
|
|
|
|
|
return history, ai_audio, "" |
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks(title="Qwen English Tutor") as demo: |
|
|
gr.Markdown("# 🗣️ English Tutor (Powered by Qwen 2.5)") |
|
|
gr.Markdown("No API Keys required! Run completely open source.") |
|
|
|
|
|
|
|
|
chatbot = gr.Chatbot(label="Conversation") |
|
|
|
|
|
with gr.Row(): |
|
|
audio_in = gr.Audio(sources=["microphone"], type="filepath", label="🎤 Speak") |
|
|
text_in = gr.Textbox(label="⌨️ Type") |
|
|
|
|
|
submit_btn = gr.Button("Send", variant="primary") |
|
|
audio_out = gr.Audio(label="Teacher's Voice", autoplay=True) |
|
|
|
|
|
submit_btn.click( |
|
|
fn=conversation_logic, |
|
|
inputs=[audio_in, text_in, chatbot], |
|
|
outputs=[chatbot, audio_out, text_in] |
|
|
) |
|
|
|
|
|
demo.launch() |