|
|
import gradio as gr |
|
|
import torch |
|
|
from transformers import pipeline |
|
|
import whisper |
|
|
from gtts import gTTS |
|
|
import os |
|
|
import time |
|
|
|
|
|
|
|
|
|
|
|
MODEL_ID = "Qwen/Qwen2.5-0.5B-Instruct" |
|
|
|
|
|
print(f"⏳ Loading {MODEL_ID}...") |
|
|
|
|
|
pipe = pipeline( |
|
|
"text-generation", |
|
|
model=MODEL_ID, |
|
|
model_kwargs={"low_cpu_mem_usage": True}, |
|
|
device_map="cpu", |
|
|
) |
|
|
|
|
|
whisper_model = whisper.load_model("tiny.en") |
|
|
|
|
|
print("✅ Systems Ready.") |
|
|
|
|
|
|
|
|
SYSTEM_PROMPT = """ |
|
|
You are a helpful English teacher. |
|
|
1. If the user makes a grammar mistake, ALWAYS correct it first. Start with "Correction: [Correct Sentence]". |
|
|
2. Then, answer the user's question or continue the chat. |
|
|
3. Keep your own English simple (A2 level). |
|
|
4. Keep responses short (max 2 sentences). |
|
|
""" |
|
|
|
|
|
|
|
|
|
|
|
def text_to_speech(text): |
|
|
try: |
|
|
if not text: return None |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
text_clean = text.replace("*", "").replace("#", "") |
|
|
|
|
|
tts = gTTS(text_clean, lang='en') |
|
|
filename = f"response_{int(time.time())}.mp3" |
|
|
tts.save(filename) |
|
|
return filename |
|
|
except: |
|
|
return None |
|
|
|
|
|
def generate_response(message, history): |
|
|
messages = [{"role": "system", "content": SYSTEM_PROMPT}] |
|
|
|
|
|
|
|
|
for user_msg, bot_msg in history[-2:]: |
|
|
messages.append({"role": "user", "content": user_msg}) |
|
|
messages.append({"role": "assistant", "content": bot_msg}) |
|
|
|
|
|
messages.append({"role": "user", "content": message}) |
|
|
|
|
|
outputs = pipe( |
|
|
messages, |
|
|
max_new_tokens=60, |
|
|
do_sample=True, |
|
|
temperature=0.6, |
|
|
) |
|
|
|
|
|
return outputs[0]["generated_text"][-1]["content"] |
|
|
|
|
|
def conversation_logic(audio_path, text_input, history): |
|
|
user_text = "" |
|
|
|
|
|
|
|
|
if audio_path: |
|
|
result = whisper_model.transcribe(audio_path) |
|
|
user_text = result["text"] |
|
|
elif text_input: |
|
|
user_text = text_input |
|
|
else: |
|
|
return history, None, "" |
|
|
|
|
|
if not user_text.strip(): |
|
|
return history, None, "" |
|
|
|
|
|
|
|
|
ai_response = generate_response(user_text, history) |
|
|
|
|
|
|
|
|
ai_audio = text_to_speech(ai_response) |
|
|
|
|
|
|
|
|
history.append((user_text, ai_response)) |
|
|
return history, ai_audio, "" |
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks(title="Grammar Tutor") as demo: |
|
|
gr.Markdown(f"# ⚡ Grammar & Speaking Tutor") |
|
|
gr.Markdown("I will correct your mistakes and chat with you.") |
|
|
|
|
|
chatbot = gr.Chatbot(label="Conversation") |
|
|
|
|
|
with gr.Row(): |
|
|
audio_in = gr.Audio(sources=["microphone"], type="filepath", label="🎤 Speak") |
|
|
text_in = gr.Textbox(label="⌨️ Type") |
|
|
|
|
|
submit_btn = gr.Button("Send", variant="primary") |
|
|
audio_out = gr.Audio(label="Teacher's Voice", autoplay=True) |
|
|
|
|
|
submit_btn.click( |
|
|
fn=conversation_logic, |
|
|
inputs=[audio_in, text_in, chatbot], |
|
|
outputs=[chatbot, audio_out, text_in] |
|
|
) |
|
|
|
|
|
demo.launch() |