llama2 / app.py
murat4595's picture
Update app.py
b6154d8 verified
import gradio as gr
import torch
from transformers import pipeline
import whisper
from gtts import gTTS
import os
import time
# --- 1. LOAD THE OPEN MODEL (Qwen) ---
print("⏳ Loading Qwen 2.5...")
# Setup device (GPU or CPU)
device = "cuda" if torch.cuda.is_available() else "cpu"
pipe = pipeline(
"text-generation",
model="Qwen/Qwen2.5-7B-Instruct",
model_kwargs={
"torch_dtype": torch.float16,
"low_cpu_mem_usage": True,
},
device_map="auto",
)
# Load Whisper (Ears)
whisper_model = whisper.load_model("base.en")
print("✅ Model Loaded!")
# --- 2. TEACHER PERSONA ---
SYSTEM_PROMPT = """
You are a friendly, encouraging English tutor.
- Your goal: Help the user practice speaking English.
- Level: Intermediate (B1).
- Keep responses short (1-3 sentences).
- If the user makes a mistake, kindly correct it.
"""
# --- 3. PROCESSING FUNCTIONS ---
def text_to_speech(text):
"""Converts AI text to audio."""
try:
if not text: return None
clean_text = text.replace("failed", "")
tts = gTTS(text, lang='en')
filename = f"response_{int(time.time())}.mp3"
tts.save(filename)
return filename
except Exception as e:
print(f"TTS Error: {e}")
return None
def generate_response(message, history):
"""Uses the Qwen Pipeline to generate a reply."""
# Format messages for Qwen
messages = [{"role": "system", "content": SYSTEM_PROMPT}]
# Add history (Handling the standard tuple format)
for user_msg, bot_msg in history[-3:]:
messages.append({"role": "user", "content": user_msg})
messages.append({"role": "assistant", "content": bot_msg})
messages.append({"role": "user", "content": message})
# Generate
outputs = pipe(
messages,
max_new_tokens=150,
do_sample=True,
temperature=0.7,
)
return outputs[0]["generated_text"][-1]["content"]
def conversation_logic(audio_path, text_input, history):
"""Main loop: Listen -> Think -> Speak."""
user_text = ""
# 1. Get Input
if audio_path:
result = whisper_model.transcribe(audio_path)
user_text = result["text"]
elif text_input:
user_text = text_input
else:
return history, None, ""
if not user_text.strip():
return history, None, ""
# 2. Get Intelligence
ai_response = generate_response(user_text, history)
# 3. Speak Output
ai_audio = text_to_speech(ai_response)
# 4. Update Chat (Standard tuple format)
history.append((user_text, ai_response))
return history, ai_audio, ""
# --- 4. BUILD INTERFACE ---
with gr.Blocks(title="Qwen English Tutor") as demo:
gr.Markdown("# 🗣️ English Tutor (Powered by Qwen 2.5)")
gr.Markdown("No API Keys required! Run completely open source.")
# FIXED: Removed 'type="messages"' to support older Gradio versions
chatbot = gr.Chatbot(label="Conversation")
with gr.Row():
audio_in = gr.Audio(sources=["microphone"], type="filepath", label="🎤 Speak")
text_in = gr.Textbox(label="⌨️ Type")
submit_btn = gr.Button("Send", variant="primary")
audio_out = gr.Audio(label="Teacher's Voice", autoplay=True)
submit_btn.click(
fn=conversation_logic,
inputs=[audio_in, text_in, chatbot],
outputs=[chatbot, audio_out, text_in]
)
demo.launch()