llama / app.py
murat4595's picture
Create app.py
0d79937 verified
import gradio as gr
import torch
from transformers import pipeline
import whisper
from gtts import gTTS
import os
import time
# --- 1. CONFIGURATION ---
# Using the Ultra-Fast 0.5B model (Switch to "1.5B" if you want it smarter)
MODEL_ID = "Qwen/Qwen2.5-0.5B-Instruct"
print(f"⏳ Loading {MODEL_ID}...")
pipe = pipeline(
"text-generation",
model=MODEL_ID,
model_kwargs={"low_cpu_mem_usage": True},
device_map="cpu",
)
whisper_model = whisper.load_model("tiny.en")
print("✅ Systems Ready.")
# --- 2. TEACHER PERSONA (UPDATED FOR GRAMMAR) ---
SYSTEM_PROMPT = """
You are a helpful English teacher.
1. If the user makes a grammar mistake, ALWAYS correct it first. Start with "Correction: [Correct Sentence]".
2. Then, answer the user's question or continue the chat.
3. Keep your own English simple (A2 level).
4. Keep responses short (max 2 sentences).
"""
# --- 3. PROCESSING FUNCTIONS ---
def text_to_speech(text):
try:
if not text: return None
# Remove "Correction:" tag so the voice flows better
# text = text.replace("Correction:", "You should say:")
# Clean up for TTS
text_clean = text.replace("*", "").replace("#", "")
tts = gTTS(text_clean, lang='en')
filename = f"response_{int(time.time())}.mp3"
tts.save(filename)
return filename
except:
return None
def generate_response(message, history):
messages = [{"role": "system", "content": SYSTEM_PROMPT}]
# Keep history short for speed
for user_msg, bot_msg in history[-2:]:
messages.append({"role": "user", "content": user_msg})
messages.append({"role": "assistant", "content": bot_msg})
messages.append({"role": "user", "content": message})
outputs = pipe(
messages,
max_new_tokens=60, # Slightly increased to allow room for corrections
do_sample=True,
temperature=0.6, # Lower temperature = more accurate grammar
)
return outputs[0]["generated_text"][-1]["content"]
def conversation_logic(audio_path, text_input, history):
user_text = ""
# 1. Transcribe
if audio_path:
result = whisper_model.transcribe(audio_path)
user_text = result["text"]
elif text_input:
user_text = text_input
else:
return history, None, ""
if not user_text.strip():
return history, None, ""
# 2. Think
ai_response = generate_response(user_text, history)
# 3. Speak
ai_audio = text_to_speech(ai_response)
# 4. Update UI
history.append((user_text, ai_response))
return history, ai_audio, ""
# --- 4. UI SETUP ---
with gr.Blocks(title="Grammar Tutor") as demo:
gr.Markdown(f"# ⚡ Grammar & Speaking Tutor")
gr.Markdown("I will correct your mistakes and chat with you.")
chatbot = gr.Chatbot(label="Conversation")
with gr.Row():
audio_in = gr.Audio(sources=["microphone"], type="filepath", label="🎤 Speak")
text_in = gr.Textbox(label="⌨️ Type")
submit_btn = gr.Button("Send", variant="primary")
audio_out = gr.Audio(label="Teacher's Voice", autoplay=True)
submit_btn.click(
fn=conversation_logic,
inputs=[audio_in, text_in, chatbot],
outputs=[chatbot, audio_out, text_in]
)
demo.launch()