File size: 3,447 Bytes
e880f41
362fe11
8d796c9
da608a6
 
362fe11
f7ddde2
 
8d796c9
b6154d8
f7ddde2
b6154d8
8d796c9
362fe11
8d796c9
362fe11
8d796c9
 
b6154d8
8d796c9
 
b6154d8
362fe11
 
 
da608a6
 
8d796c9
362fe11
8d796c9
da608a6
f7ddde2
8d796c9
f7ddde2
8d796c9
 
da608a6
 
8d796c9
da608a6
 
8d796c9
f7ddde2
 
8d796c9
f7ddde2
 
 
 
 
 
 
 
8d796c9
 
da608a6
b6154d8
362fe11
da608a6
b6154d8
362fe11
 
 
da608a6
362fe11
 
b6154d8
8d796c9
362fe11
8d796c9
362fe11
 
 
 
b6154d8
f7ddde2
 
8d796c9
da608a6
f7ddde2
 
8d796c9
f7ddde2
da608a6
f7ddde2
da608a6
f7ddde2
da608a6
f7ddde2
 
 
 
 
b6154d8
8d796c9
f7ddde2
8d796c9
362fe11
da608a6
b6154d8
f7ddde2
8d796c9
da608a6
 
8d796c9
da608a6
8d796c9
 
 
da608a6
b6154d8
 
da608a6
 
362fe11
 
da608a6
 
8d796c9
da608a6
 
f7ddde2
 
 
da608a6
e880f41
da608a6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import gradio as gr
import torch
from transformers import pipeline
import whisper
from gtts import gTTS
import os
import time

# --- 1. LOAD THE OPEN MODEL (Qwen) ---
print("⏳ Loading Qwen 2.5...")

# Setup device (GPU or CPU)
device = "cuda" if torch.cuda.is_available() else "cpu"

pipe = pipeline(
    "text-generation",
    model="Qwen/Qwen2.5-7B-Instruct",
    model_kwargs={
        "torch_dtype": torch.float16,
        "low_cpu_mem_usage": True,
    },
    device_map="auto",
)

# Load Whisper (Ears)
whisper_model = whisper.load_model("base.en")

print("✅ Model Loaded!")

# --- 2. TEACHER PERSONA ---
SYSTEM_PROMPT = """
You are a friendly, encouraging English tutor.
- Your goal: Help the user practice speaking English.
- Level: Intermediate (B1).
- Keep responses short (1-3 sentences).
- If the user makes a mistake, kindly correct it.
"""

# --- 3. PROCESSING FUNCTIONS ---

def text_to_speech(text):
    """Converts AI text to audio."""
    try:
        if not text: return None
        clean_text = text.replace("failed", "") 
        tts = gTTS(text, lang='en')
        filename = f"response_{int(time.time())}.mp3"
        tts.save(filename)
        return filename
    except Exception as e:
        print(f"TTS Error: {e}")
        return None

def generate_response(message, history):
    """Uses the Qwen Pipeline to generate a reply."""
    
    # Format messages for Qwen
    messages = [{"role": "system", "content": SYSTEM_PROMPT}]
    
    # Add history (Handling the standard tuple format)
    for user_msg, bot_msg in history[-3:]:
        messages.append({"role": "user", "content": user_msg})
        messages.append({"role": "assistant", "content": bot_msg})
    
    messages.append({"role": "user", "content": message})

    # Generate
    outputs = pipe(
        messages,
        max_new_tokens=150,
        do_sample=True,
        temperature=0.7,
    )
    
    return outputs[0]["generated_text"][-1]["content"]

def conversation_logic(audio_path, text_input, history):
    """Main loop: Listen -> Think -> Speak."""
    
    user_text = ""

    # 1. Get Input
    if audio_path:
        result = whisper_model.transcribe(audio_path)
        user_text = result["text"]
    elif text_input:
        user_text = text_input
    else:
        return history, None, ""

    if not user_text.strip():
        return history, None, ""

    # 2. Get Intelligence
    ai_response = generate_response(user_text, history)

    # 3. Speak Output
    ai_audio = text_to_speech(ai_response)

    # 4. Update Chat (Standard tuple format)
    history.append((user_text, ai_response))
    
    return history, ai_audio, ""

# --- 4. BUILD INTERFACE ---

with gr.Blocks(title="Qwen English Tutor") as demo:
    gr.Markdown("# 🗣️ English Tutor (Powered by Qwen 2.5)")
    gr.Markdown("No API Keys required! Run completely open source.")
    
    # FIXED: Removed 'type="messages"' to support older Gradio versions
    chatbot = gr.Chatbot(label="Conversation")
    
    with gr.Row():
        audio_in = gr.Audio(sources=["microphone"], type="filepath", label="🎤 Speak")
        text_in = gr.Textbox(label="⌨️ Type")
        
    submit_btn = gr.Button("Send", variant="primary")
    audio_out = gr.Audio(label="Teacher's Voice", autoplay=True)

    submit_btn.click(
        fn=conversation_logic,
        inputs=[audio_in, text_in, chatbot],
        outputs=[chatbot, audio_out, text_in]
    )

demo.launch()