import torch
import gradio as gr
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM

# ==========================================
# 1. CẤU HÌNH & LOAD MODEL (BACKEND)
# ==========================================
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# --- Load PhoWhisper ---
print("Đang tải model PhoWhisper...")
try:
    asr_pipeline = pipeline(
        "automatic-speech-recognition",
        model="vinai/PhoWhisper-small",
        device=0 if torch.cuda.is_available() else -1
    )
except Exception as e:
    print(f"Lỗi load PhoWhisper: {e}")
    asr_pipeline = None

# --- Load ViT5 Translation ---
TRANSLATION_MODEL_PATH = "." 

print(f"Đang tải model dịch...")
try:
    trans_tokenizer = AutoTokenizer.from_pretrained(TRANSLATION_MODEL_PATH)
    trans_model = AutoModelForSeq2SeqLM.from_pretrained(TRANSLATION_MODEL_PATH).to(DEVICE)
    print("Load model dịch thành công!")
except Exception as e:
    print(f"Lỗi load model dịch: {e}")
    trans_model = None

# ==========================================
# 2. HÀM XỬ LÝ LOGIC
# ==========================================
def speech_to_text(audio_path):
    if audio_path is None: return ""
    if asr_pipeline is None: return "Lỗi: Chưa load được PhoWhisper."
    try:
        output = asr_pipeline(audio_path)
        return output['text']
    except Exception as e:
        return f"Lỗi nhận dạng: {str(e)}"

def text_to_gloss(vietnamese_text):
    if not vietnamese_text: return ""
    if trans_model is None: return "Lỗi: Chưa load được model dịch."

    input_text = f"vi: {vietnamese_text}"
    inputs = trans_tokenizer(input_text, return_tensors="pt", max_length=128, truncation=True).to(DEVICE)

    with torch.no_grad():
        outputs = trans_model.generate(
            inputs["input_ids"],
            max_length=128,
            num_beams=5,
            early_stopping=True
        )
    
    gloss_text = trans_tokenizer.decode(outputs[0], skip_special_tokens=True)
    return gloss_text.replace("vsl: ", "") if gloss_text.startswith("vsl: ") else gloss_text

def full_pipeline(audio, text_input, mode):
    vietnamese_output = ""
    if mode == "Giọng nói (Microphone/File)" and audio is not None:
        vietnamese_output = speech_to_text(audio)
    elif mode == "Văn bản (Nhập tay)" and text_input:
        vietnamese_output = text_input
    else:
        if mode == "Giọng nói (Microphone/File)":
             return "⚠️ Vui lòng thu âm hoặc tải file.", ""
        else:
             return "⚠️ Vui lòng nhập văn bản.", ""
    
    gloss_output = text_to_gloss(vietnamese_output)
    return vietnamese_output, gloss_output

# ==========================================
# 3. GIAO DIỆN COFFEE THEME (UI/UX)
# ==========================================

# CSS Tông màu Cà phê 
custom_css = """
@import url('https://fonts.googleapis.com/css2?family=Lora:ital,wght@0,400;0,700;1,400&family=Roboto:wght@300;400;500&display=swap');

body { 
    font-family: 'Roboto', sans-serif; 
    background-color: #fdfbf7; 
}

/* --- HEADER --- */
.header-container {
    background: linear-gradient(135deg, #3e2723 0%, #5d4037 100%);
    padding: 30px;
    border-radius: 15px;
    text-align: center;
    box-shadow: 0 6px 12px rgba(62, 39, 35, 0.3);
    margin-bottom: 25px;
    border-bottom: 4px solid #8d6e63;
}

/* Ép màu trắng bằng !important */
.uni-name { 
    font-family: 'Roboto', sans-serif;
    font-size: 16px; 
    text-transform: uppercase; 
    letter-spacing: 2px; 
    color: #ffffff !important;  /* Màu trắng */
    font-weight: bold;
    opacity: 0.9;
}

.project-name { 
    font-family: 'Lora', serif; 
    font-size: 32px; 
    font-weight: 700; 
    margin: 15px 0; 
    text-shadow: 2px 2px 4px rgba(0,0,0,0.3);
    color: #ffffff !important; /* Màu trắng */
}

.author-name { 
    font-size: 14px; 
    font-style: italic; 
    color: #ffffff !important; /* Màu trắng */
    border-top: 1px solid rgba(255,255,255,0.3);
    display: inline-block;
    padding-top: 10px;
    margin-top: 5px;
}

/* --- BUTTONS --- */
button.primary-btn {
    background-color: #6d4c41 !important;
    color: white !important;
    font-weight: 600;
    border-radius: 8px;
    border: none;
    transition: all 0.3s;
}

button.primary-btn:hover {
    background-color: #5d4037 !important;
    transform: translateY(-2px);
    box-shadow: 0 6px 10px rgba(109, 76, 65, 0.3);
}

/* --- OTHER --- */
.label-text {
    font-weight: bold;
    color: #4e342e;
    margin-bottom: 5px;
    font-size: 1.1em;
}

.gloss-box textarea {
    font-family: 'Roboto', sans-serif;
    font-size: 22px !important;
    font-weight: bold;
    color: #bf360c !important; 
    background-color: #fff3e0 !important; 
    border: 1px solid #ffccbc !important;
}

.footer { 
    text-align: center; 
    color: #8d6e63; 
    font-size: 12px; 
    margin-top: 40px; 
    border-top: 1px solid #d7ccc8;
    padding-top: 20px;
}
"""

# Tạo Theme 
coffee_theme = gr.themes.Soft(
    primary_hue="orange",
    secondary_hue="yellow",
    neutral_hue="gray",
).set(
    body_background_fill="#fcf9f2",     
    block_background_fill="#ffffff",
    block_border_width="1px",
    block_shadow="0 2px 4px rgba(0,0,0,0.05)",
    button_primary_background_fill="#6d4c41",
    button_primary_background_fill_hover="#5d4037",
    button_primary_text_color="white",
    slider_color="#8d6e63"
)

with gr.Blocks(css=custom_css, theme=coffee_theme, title="VSL Coffee Translator") as demo:
    
    # --- HEADER ---
    gr.HTML(
        """
        <div class="header-container">
            <div class="uni-name">Trường Đại học Kinh tế - Đại học Đà Nẵng</div>
            <div class="project-name">☕ HỆ THỐNG DỊCH VSL - ANGEL COFFEE ☕</div>
            <div class="author-name">Sinh viên: Lê Thị Hà Vy | GVHD: Th.S Nguyễn Văn Chức</div>
        </div>
        """
    )

    # --- MAIN CONTENT ---
    with gr.Row():
        
        # CỘT TRÁI: INPUT
        with gr.Column(scale=1, variant="panel"):
            gr.Markdown("### 🎙️ Nhập yêu cầu gọi món", elem_classes="label-text")
            
            with gr.Tabs():
                # Tab Audio
                with gr.TabItem("Ghi âm / Tải file"):
                    input_audio = gr.Audio(
                        sources=["microphone", "upload"], 
                        type="filepath",
                        label="Nói câu gọi món..."
                    )
                    mode_audio = gr.State(value="Giọng nói (Microphone/File)")
                    btn_audio = gr.Button("☕ Xử lý Giọng nói", variant="primary", elem_classes="primary-btn")

                # Tab Text
                with gr.TabItem("Nhập văn bản"):
                    input_text = gr.Textbox(
                        label="Nhập câu tiếng Việt",
                        placeholder="Ví dụ: Cho tôi một ly cà phê sữa ít đường...",
                        lines=4
                    )
                    mode_text = gr.State(value="Văn bản (Nhập tay)")
                    
                    # Gợi ý câu mẫu
                    gr.Examples(
                        examples=[
                            ["Tôi gọi một ly cà phê muối"],
                            ["Lấy cho mình một bạc xỉu ít ngọt"],
                            ["Tôi muốn thanh toán tiền"],
                            ["Cảm ơn bạn rất nhiều"]
                        ],
                        inputs=input_text,
                        label="Gợi ý gọi món"
                    )
                    
                    btn_text = gr.Button("☕ Dịch Văn bản", variant="primary", elem_classes="primary-btn")

        # CỘT PHẢI: OUTPUT
        with gr.Column(scale=1, variant="panel"):
            gr.Markdown("### ✨ Kết quả dịch (VSL Gloss)", elem_classes="label-text")
            
            # Kết quả trung gian
            gr.Label("Bước 1: Nhận dạng Tiếng Việt", show_label=False, color="orange")
            output_vi = gr.Textbox(
                label="Văn bản Tiếng Việt", 
                interactive=False, 
                show_copy_button=True,
                lines=2
            )
            
            gr.HTML("<br>") 

            # Kết quả cuối cùng
            gr.Label("Bước 2: Cú pháp Ký hiệu (VSL)", show_label=False, color="yellow")
            output_gloss = gr.Textbox(
                label="VSL Gloss Output", 
                interactive=False, 
                show_copy_button=True,
                elem_classes="gloss-box", 
                lines=3
            )

    # --- FOOTER ---
    gr.HTML(
        """
        <div class="footer">
        Dự án hỗ trợ giao tiếp cho người khiếm thính tại Angel Coffee Đà Nẵng.<br>
        Powered by <b>PhoWhisper</b> & <b>ViT5</b> (Fine-tuned on Coffee Dataset).
        </div>
        """
    )

    # --- XỬ LÝ SỰ KIỆN ---
    btn_audio.click(
        fn=full_pipeline, 
        inputs=[input_audio, input_text, mode_audio], 
        outputs=[output_vi, output_gloss]
    )
    
    btn_text.click(
        fn=full_pipeline, 
        inputs=[input_audio, input_text, mode_text], 
        outputs=[output_vi, output_gloss]
    )

# Chạy app
if __name__ == "__main__":
    demo.launch()