import gradio as gr
import torch
from transformers import pipeline
import whisper
from gtts import gTTS
import os
import time

# --- 1. LOAD THE OPEN MODEL (Qwen) ---
print("⏳ Loading Qwen 2.5...")

# Setup device (GPU or CPU)
device = "cuda" if torch.cuda.is_available() else "cpu"

pipe = pipeline(
    "text-generation",
    model="Qwen/Qwen2.5-7B-Instruct",
    model_kwargs={
        "torch_dtype": torch.float16,
        "low_cpu_mem_usage": True,
    },
    device_map="auto",
)

# Load Whisper (Ears)
whisper_model = whisper.load_model("base.en")

print("✅ Model Loaded!")

# --- 2. TEACHER PERSONA ---
SYSTEM_PROMPT = """
You are a friendly, encouraging English tutor.
- Your goal: Help the user practice speaking English.
- Level: Intermediate (B1).
- Keep responses short (1-3 sentences).
- If the user makes a mistake, kindly correct it.
"""

# --- 3. PROCESSING FUNCTIONS ---

def text_to_speech(text):
    """Converts AI text to audio."""
    try:
        if not text: return None
        clean_text = text.replace("failed", "") 
        tts = gTTS(text, lang='en')
        filename = f"response_{int(time.time())}.mp3"
        tts.save(filename)
        return filename
    except Exception as e:
        print(f"TTS Error: {e}")
        return None

def generate_response(message, history):
    """Uses the Qwen Pipeline to generate a reply."""
    
    # Format messages for Qwen
    messages = [{"role": "system", "content": SYSTEM_PROMPT}]
    
    # Add history (Handling the standard tuple format)
    for user_msg, bot_msg in history[-3:]:
        messages.append({"role": "user", "content": user_msg})
        messages.append({"role": "assistant", "content": bot_msg})
    
    messages.append({"role": "user", "content": message})

    # Generate
    outputs = pipe(
        messages,
        max_new_tokens=150,
        do_sample=True,
        temperature=0.7,
    )
    
    return outputs[0]["generated_text"][-1]["content"]

def conversation_logic(audio_path, text_input, history):
    """Main loop: Listen -> Think -> Speak."""
    
    user_text = ""

    # 1. Get Input
    if audio_path:
        result = whisper_model.transcribe(audio_path)
        user_text = result["text"]
    elif text_input:
        user_text = text_input
    else:
        return history, None, ""

    if not user_text.strip():
        return history, None, ""

    # 2. Get Intelligence
    ai_response = generate_response(user_text, history)

    # 3. Speak Output
    ai_audio = text_to_speech(ai_response)

    # 4. Update Chat (Standard tuple format)
    history.append((user_text, ai_response))
    
    return history, ai_audio, ""

# --- 4. BUILD INTERFACE ---

with gr.Blocks(title="Qwen English Tutor") as demo:
    gr.Markdown("# 🗣️ English Tutor (Powered by Qwen 2.5)")
    gr.Markdown("No API Keys required! Run completely open source.")
    
    # FIXED: Removed 'type="messages"' to support older Gradio versions
    chatbot = gr.Chatbot(label="Conversation")
    
    with gr.Row():
        audio_in = gr.Audio(sources=["microphone"], type="filepath", label="🎤 Speak")
        text_in = gr.Textbox(label="⌨️ Type")
        
    submit_btn = gr.Button("Send", variant="primary")
    audio_out = gr.Audio(label="Teacher's Voice", autoplay=True)

    submit_btn.click(
        fn=conversation_logic,
        inputs=[audio_in, text_in, chatbot],
        outputs=[chatbot, audio_out, text_in]
    )

demo.launch()