"""Gradio demo for bilingual oral translation using Qwen3-0.6B + LoRA.

This app provides a simple interface for Chinese ↔ English oral translation
using a LoRA fine-tuned Qwen3-0.6B model.
"""

import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel


def build_prompt(direction: str, text: str) -> str:
    """Build the instruction prompt for a given translation direction."""
    if direction == "zh2en":
        inst = "请把下面中文翻译成口语自然的英文。只输出译文。"
    else:
        inst = "请把下面英文翻译成口语自然的中文。只输出译文。"
    return f"### Instruction:\n{inst}\n\n### Input:\n{text}\n\n### Response:\n"


def load_model():
    """Load the base model and LoRA adapter."""
    base_model_name = "Qwen/Qwen3-0.6B"
    adapter_path = "Hzzzzx0/qwen3-0.6b-oral-lora"  # You'll need to upload your model here
    
    print(f"Loading base model: {base_model_name}")
    tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
    model = AutoModelForCausalLM.from_pretrained(
        base_model_name,
        torch_dtype=torch.bfloat16,
        device_map="auto",
        trust_remote_code=True,
    )
    
    print(f"Loading LoRA adapter: {adapter_path}")
    model = PeftModel.from_pretrained(model, adapter_path)
    model.eval()
    
    return model, tokenizer


# Load model at startup
print("Initializing model...")
model, tokenizer = load_model()
print("Model loaded successfully!")


def translate(direction: str, text: str) -> str:
    """Translate text using the LoRA fine-tuned model."""
    if not text.strip():
        return "请输入要翻译的文本 / Please enter text to translate"
    
    prompt = build_prompt(direction, text)
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    
    with torch.no_grad():
        output = model.generate(
            **inputs,
            max_new_tokens=128,
            do_sample=False,
            repetition_penalty=1.2,
            eos_token_id=tokenizer.eos_token_id,
            pad_token_id=tokenizer.pad_token_id,
        )
    
    result = tokenizer.decode(output[0], skip_special_tokens=True)
    
    # Extract only the response part
    if "### Response:" in result:
        return result.split("### Response:")[-1].strip()
    return result


# Define example inputs
examples = [
    ["zh2en", "你好呀"],
    ["zh2en", "今天天气真不错"],
    ["zh2en", "我们去吃饭吧"],
    ["en2zh", "See you later"],
    ["en2zh", "How are you doing?"],
    ["en2zh", "Let's grab some coffee"],
]


# Create Gradio interface
with gr.Blocks(title="口语化机器翻译 | Oral Translation", theme=gr.themes.Soft()) as demo:
    gr.Markdown(
        """
        # 🌐 口语化自动机器翻译
        ## Oral Machine Translation (Chinese ↔ English)
        
        基于 **Qwen3-0.6B + LoRA** 微调的中英双向口语翻译系统
        
        Built with Qwen3-0.6B fine-tuned using LoRA for natural, conversational translation.
        """
    )
    
    with gr.Row():
        with gr.Column():
            direction = gr.Radio(
                choices=[
                    ("中文 → 英文 (Chinese to English)", "zh2en"),
                    ("英文 → 中文 (English to Chinese)", "en2zh"),
                ],
                value="zh2en",
                label="翻译方向 | Translation Direction",
            )
            
            input_text = gr.Textbox(
                lines=5,
                placeholder="输入要翻译的文本...\nEnter text to translate...",
                label="输入 | Input",
            )
            
            translate_btn = gr.Button("🔄 翻译 | Translate", variant="primary")
        
        with gr.Column():
            output_text = gr.Textbox(
                lines=5,
                label="翻译结果 | Translation",
            )
    
    gr.Examples(
        examples=examples,
        inputs=[direction, input_text],
        outputs=output_text,
        fn=translate,
        cache_examples=False,
    )
    
    translate_btn.click(
        fn=translate,
        inputs=[direction, input_text],
        outputs=output_text,
    )
    
    gr.Markdown(
        """
        ---
        ### 📊 模型信息 | Model Info
        - **基础模型 | Base Model**: Qwen3-0.6B
        - **微调方法 | Fine-tuning**: LoRA (rank=16, alpha=32)
        - **训练数据 | Training Data**: OpenSubtitles (5K samples)
        - **BLEU Score**: 11.89 (vs 1.24 baseline, +858% improvement)
        
        ### 🔗 相关链接 | Links
        - [GitHub Repository](https://github.com/yourusername/mt-qwen-oral)
        - [Model Card](https://huggingface.co/Hzzzzx0/qwen3-0.6b-oral-lora)
        """
    )

if __name__ == "__main__":
    demo.launch()