import gradio as gr
import requests
from bs4 import BeautifulSoup

from transformers import (
    AutoTokenizer,
    AutoModelForSeq2SeqLM
)

# =========================
# MODELS
# =========================

# RU -> VI
ruvi_model_name = "Helsinki-NLP/opus-mt-ru-vi"

ruvi_tokenizer = AutoTokenizer.from_pretrained(
    ruvi_model_name
)

ruvi_model = AutoModelForSeq2SeqLM.from_pretrained(
    ruvi_model_name
)

# VI -> RU
viru_model_name = "Helsinki-NLP/opus-mt-vi-ru"

viru_tokenizer = AutoTokenizer.from_pretrained(
    viru_model_name
)

viru_model = AutoModelForSeq2SeqLM.from_pretrained(
    viru_model_name
)

# =========================
# WIKTIONARY SCRAPER
# =========================

def get_russian_info(word):

    try:

        url = f"https://ru.wiktionary.org/wiki/{word}"

        headers = {
            "User-Agent": "Mozilla/5.0"
        }

        response = requests.get(
            url,
            headers=headers,
            timeout=10
        )

        soup = BeautifulSoup(
            response.text,
            "html.parser"
        )

        # =====================
        # DEFINITIONS
        # =====================

        definitions = []

        ols = soup.find_all("ol")

        for ol in ols[:2]:

            items = ol.find_all("li")

            for item in items[:5]:

                text = item.get_text(
                    " ",
                    strip=True
                )

                if len(text) > 20:
                    definitions.append(text)

        # =====================
        # EXAMPLES
        # =====================

        examples = []

        quotes = soup.find_all(
            "span",
            class_="example-block"
        )

        for q in quotes[:5]:

            txt = q.get_text(
                " ",
                strip=True
            )

            if txt:
                examples.append(txt)

        # fallback example search
        if len(examples) == 0:

            for i in soup.find_all("i")[:10]:

                txt = i.get_text(
                    " ",
                    strip=True
                )

                if (
                    len(txt) > 15
                    and word.lower()
                    in txt.lower()
                ):
                    examples.append(txt)

        definition_text = (
            "\n\n".join(definitions[:5])
            if definitions
            else "Không tìm thấy định nghĩa"
        )

        example_text = (
            "\n\n".join(examples[:5])
            if examples
            else "Không có ví dụ"
        )

        return (
            definition_text,
            example_text
        )

    except Exception as e:

        return (
            f"Lỗi định nghĩa: {str(e)}",
            "Không có ví dụ"
        )

# =========================
# TRANSLATION FUNCTIONS
# =========================

def ru_to_vi(word):

    inputs = ruvi_tokenizer(
        word,
        return_tensors="pt"
    )

    outputs = ruvi_model.generate(**inputs)

    translated = ruvi_tokenizer.decode(
        outputs[0],
        skip_special_tokens=True
    )

    definition, example = (
        get_russian_info(word)
    )

    return (
        translated,
        definition,
        example
    )

def vi_to_ru(word):

    inputs = viru_tokenizer(
        word,
        return_tensors="pt"
    )

    outputs = viru_model.generate(**inputs)

    translated = viru_tokenizer.decode(
        outputs[0],
        skip_special_tokens=True
    )

    return translated

# =========================
# UI
# =========================

with gr.Blocks() as demo:

    gr.Markdown(
        "# 🇷🇺⇄🇻🇳 Russian ↔ Vietnamese Dictionary"
    )

    with gr.Tab("🇷🇺 Nga → Việt"):

        ru_input = gr.Textbox(
            placeholder="Nhập tiếng Nga..."
        )

        vi_output = gr.Textbox(
            label="🇻🇳 Nghĩa tiếng Việt"
        )

        definition_output = gr.Textbox(
            label="📖 Định nghĩa"
        )

        example_output = gr.Textbox(
            label="💬 Ví dụ"
        )

        ru_btn = gr.Button("Tra cứu")

        ru_btn.click(
            ru_to_vi,
            inputs=ru_input,
            outputs=[
                vi_output,
                definition_output,
                example_output
            ]
        )

    with gr.Tab("🇻🇳 Việt → Nga"):

        vi_input = gr.Textbox(
            placeholder="Nhập tiếng Việt..."
        )

        ru_output = gr.Textbox(
            label="🇷🇺 Nghĩa tiếng Nga"
        )

        vi_btn = gr.Button("Dịch")

        vi_btn.click(
            vi_to_ru,
            inputs=vi_input,
            outputs=ru_output
        )

demo.launch(server_name="0.0.0.0")