"""
BERT 인터랙티브 데모 (한글 버전)
================================
Devlin et al. (2019), "BERT: Pre-training of Deep Bidirectional Transformers
for Language Understanding" (arXiv:1810.04805) 논문의 태스크들을 재현합니다.

데모는 논문의 구성을 따라 여섯 개 탭으로 나뉘어 있습니다:

  사전학습 태스크 (논문 §3.1)
    1. Masked Language Model      -- 핵심 양방향 목적함수
    2. Next Sentence Prediction   -- IsNext / NotNext 이진 태스크

  파인튜닝 태스크 (논문 §4, Figure 4)
    3. 문장 쌍 분류    -- MNLI / RTE / MRPC 계열      (Figure 4a)
    4. 단일 문장 분류  -- SST-2 / CoLA 계열           (Figure 4b)
    5. 질의응답        -- SQuAD v1.1 계열             (Figure 4c)
    6. 개체명 인식     -- CoNLL-2003 계열             (Figure 4d)

로컬 실행:
    pip install -r requirements.txt
    python app.py
"""

from __future__ import annotations

import gradio as gr
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForNextSentencePrediction,
    pipeline,
)

DEVICE = 0 if torch.cuda.is_available() else -1


# ---------------------------------------------------------------------------
# 모델 로딩
# ---------------------------------------------------------------------------
# 콜드 스타트 시간을 줄이기 위해 첫 호출 시점에 lazy-load 합니다.
# 각 함수는 module-level dict에 파이프라인을 캐싱합니다.

_pipelines: dict[str, object] = {}


def get_pipeline(name: str):
    """파이프라인을 캐시에서 반환하거나 처음 호출 시 빌드합니다."""
    if name in _pipelines:
        return _pipelines[name]

    if name == "mlm":
        # §3.1 Task #1: 양방향 문맥으로부터 마스킹된 토큰 예측
        _pipelines[name] = pipeline(
            "fill-mask",
            model="bert-base-uncased",
            device=DEVICE,
            top_k=5,
        )

    elif name == "nsp":
        # §3.1 Task #2: NSP 헤드는 bert-base-uncased에 포함되어 있습니다.
        tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
        model = AutoModelForNextSentencePrediction.from_pretrained("bert-base-uncased")
        model.eval()
        if DEVICE >= 0:
            model = model.cuda()
        _pipelines[name] = (tokenizer, model)

    elif name == "mnli":
        # §4.1: MNLI는 GLUE 중 가장 큰 태스크 (392k 예제)
        _pipelines[name] = pipeline(
            "text-classification",
            model="textattack/bert-base-uncased-MNLI",
            device=DEVICE,
        )

    elif name == "sst2":
        # §4.1: SST-2는 이진 감성 분류 태스크
        _pipelines[name] = pipeline(
            "text-classification",
            model="textattack/bert-base-uncased-SST-2",
            device=DEVICE,
        )

    elif name == "squad":
        # §4.2: SQuAD v1.1 - 추출형 QA, start/end span 예측
        _pipelines[name] = pipeline(
            "question-answering",
            model="bert-large-uncased-whole-word-masking-finetuned-squad",
            device=DEVICE,
        )

    elif name == "ner":
        # §5.3: CoNLL-2003 NER - 토큰 단위 태깅
        _pipelines[name] = pipeline(
            "token-classification",
            model="dslim/bert-base-NER",
            aggregation_strategy="simple",
            device=DEVICE,
        )

    else:
        raise ValueError(f"알 수 없는 파이프라인입니다: {name}")

    return _pipelines[name]


# ---------------------------------------------------------------------------
# 1. Masked Language Model (논문 §3.1, Task #1)
# ---------------------------------------------------------------------------

def run_mlm(text: str) -> str:
    """양방향 문맥을 이용해 [MASK] 토큰을 예측합니다."""
    if not text or "[MASK]" not in text:
        return "❗ 문장에 `[MASK]` 토큰을 정확히 하나 포함시켜 주세요."

    nlp = get_pipeline("mlm")
    predictions = nlp(text)

    # 마스크가 여러 개일 때는 list-of-lists로 반환됩니다.
    # 단일 마스크 케이스를 단순화해서 UI를 일관되게 유지합니다.
    if isinstance(predictions[0], list):
        predictions = predictions[0]

    lines = [
        f"**입력 문장:** `{text}`",
        "",
        "**상위 5개 예측 (softmax 확률 기준):**",
        "",
    ]
    for i, pred in enumerate(predictions, 1):
        score = pred["score"]
        token = pred["token_str"]
        sequence = pred["sequence"]
        lines.append(f"{i}. **{token}**  — 확률 `{score:.4f}`")
        lines.append(f"   → {sequence}")
    return "\n".join(lines)


# ---------------------------------------------------------------------------
# 2. Next Sentence Prediction (논문 §3.1, Task #2)
# ---------------------------------------------------------------------------

def run_nsp(sentence_a: str, sentence_b: str) -> str:
    """문장 B가 문장 A 다음에 실제로 등장하는지 분류합니다."""
    if not sentence_a.strip() or not sentence_b.strip():
        return "❗ 문장 A와 문장 B를 모두 입력해 주세요."

    tokenizer, model = get_pipeline("nsp")
    # 토크나이저가 자동으로 [CLS] A [SEP] B [SEP] 형태와 segment ID를 만들어 줍니다.
    # 논문 Figure 2의 입력 표현과 일치합니다.
    inputs = tokenizer(sentence_a, sentence_b, return_tensors="pt", truncation=True)
    if DEVICE >= 0:
        inputs = {k: v.cuda() for k, v in inputs.items()}

    with torch.no_grad():
        outputs = model(**inputs)

    # HuggingFace BERT NSP 헤드: 라벨 0 = IsNext, 라벨 1 = NotNext
    probs = torch.softmax(outputs.logits, dim=-1).squeeze().tolist()
    is_next_prob, not_next_prob = probs[0], probs[1]
    verdict = "✅ IsNext (이어지는 문장)" if is_next_prob > not_next_prob else "❌ NotNext (관련 없는 문장)"

    return (
        f"**문장 A:** {sentence_a}\n\n"
        f"**문장 B:** {sentence_b}\n\n"
        f"**예측 결과:** {verdict}\n\n"
        f"- P(IsNext)  = `{is_next_prob:.4f}`\n"
        f"- P(NotNext) = `{not_next_prob:.4f}`"
    )


# ---------------------------------------------------------------------------
# 3. 문장 쌍 분류 (논문 §4.1, Figure 4a)
# ---------------------------------------------------------------------------

def run_mnli(premise: str, hypothesis: str) -> str:
    """MNLI: 전제가 가설을 함의/모순/중립으로 만드는지 분류"""
    if not premise.strip() or not hypothesis.strip():
        return "❗ 전제(premise)와 가설(hypothesis)을 모두 입력해 주세요."

    nlp = get_pipeline("mnli")
    # MNLI는 3-way 분류: entailment(함의) / neutral(중립) / contradiction(모순)
    # 이 textattack 체크포인트는 [SEP]로 두 문장을 연결한 형식을 받습니다.
    result = nlp(f"{premise} [SEP] {hypothesis}")[0]
    label = result["label"]
    score = result["score"]

    # 라벨을 한국어로 표시
    label_kor = {
        "entailment": "entailment (함의)",
        "neutral": "neutral (중립)",
        "contradiction": "contradiction (모순)",
        "LABEL_0": "contradiction (모순)",
        "LABEL_1": "entailment (함의)",
        "LABEL_2": "neutral (중립)",
    }.get(label, label)

    return (
        f"**전제 (Premise):**    {premise}\n\n"
        f"**가설 (Hypothesis):** {hypothesis}\n\n"
        f"**예측 결과:** `{label_kor}` (확신도 `{score:.4f}`)"
    )


# ---------------------------------------------------------------------------
# 4. 단일 문장 분류 (논문 §4.1, Figure 4b)
# ---------------------------------------------------------------------------

def run_sst2(text: str) -> str:
    """SST-2 이진 감성 분류"""
    if not text.strip():
        return "❗ 문장을 입력해 주세요."

    nlp = get_pipeline("sst2")
    result = nlp(text)[0]
    label_map = {"LABEL_0": "😞 부정 (Negative)", "LABEL_1": "😀 긍정 (Positive)"}
    label = label_map.get(result["label"], result["label"])

    return (
        f"**입력 문장:** {text}\n\n"
        f"**감성:** {label}\n\n"
        f"**확신도:** `{result['score']:.4f}`"
    )


# ---------------------------------------------------------------------------
# 5. 질의응답 (논문 §4.2, Figure 4c)
# ---------------------------------------------------------------------------

def run_squad(context: str, question: str) -> str:
    """추출형 QA: context 안에서 답변 span을 찾아 반환"""
    if not context.strip() or not question.strip():
        return "❗ 지문(context)과 질문(question)을 모두 입력해 주세요."

    nlp = get_pipeline("squad")
    # 논문에서는 QA를 passage 토큰들에 대한 시작 토큰 S와 끝 토큰 E의 분포 예측으로
    # 정의합니다 (§4.2 참고). 파이프라인이 이 과정을 감싸줍니다.
    result = nlp(question=question, context=context)

    return (
        f"**질문:** {question}\n\n"
        f"**답변:** **{result['answer']}**\n\n"
        f"- 답변 위치: 문자 `{result['start']}`–`{result['end']}`\n"
        f"- 확신도 점수: `{result['score']:.4f}`"
    )


# ---------------------------------------------------------------------------
# 6. 개체명 인식 (논문 §5.3, Figure 4d)
# ---------------------------------------------------------------------------

def run_ner(text: str) -> str:
    """CoNLL-2003 NER: 각 토큰을 PER/ORG/LOC/MISC로 태깅"""
    if not text.strip():
        return "❗ 분석할 문장을 입력해 주세요."

    nlp = get_pipeline("ner")
    entities = nlp(text)

    if not entities:
        return f"**입력 문장:** {text}\n\n_검출된 개체가 없습니다._"

    # 개체 유형을 한국어 설명과 함께 표시
    entity_kor = {
        "PER": "인물 (PER)",
        "ORG": "조직 (ORG)",
        "LOC": "장소 (LOC)",
        "MISC": "기타 (MISC)",
    }

    lines = [f"**입력 문장:** {text}", "", "**검출된 개체:**", ""]
    for ent in entities:
        kor_label = entity_kor.get(ent["entity_group"], ent["entity_group"])
        lines.append(
            f"- **{ent['word']}** → `{kor_label}`  "
            f"(점수 `{ent['score']:.3f}`, 문자 {ent['start']}–{ent['end']})"
        )
    return "\n".join(lines)


# ---------------------------------------------------------------------------
# Gradio UI
# ---------------------------------------------------------------------------

INTRO_MD = """
# 🤖 BERT 인터랙티브 데모

이 Space는 **Devlin et al. (2019), *BERT: Pre-training of Deep Bidirectional Transformers
for Language Understanding*** ([arXiv:1810.04805](https://arxiv.org/abs/1810.04805)) 논문의 실험들을
직접 체험해보는 페이지입니다.

처음 두 탭은 논문의 **사전학습 목적함수(§3.1)** 를 보여줍니다:
- **Masked LM** — 무작위로 15% 토큰을 가리고 양방향 문맥으로 예측
- **Next Sentence Prediction** — 문장 B가 문장 A 다음에 오는지 판단

나머지 네 탭은 Figure 4의 **파인튜닝 태스크 카테고리**를 다룹니다:
- (a) 문장 쌍 분류 — MNLI
- (b) 단일 문장 분류 — SST-2
- (c) 질의응답 — SQuAD v1.1
- (d) 단일 문장 태깅 — CoNLL-2003 NER

> 💡 각 탭을 처음 사용할 때 사전학습 체크포인트가 Hub에서 다운로드됩니다(약 400 MB – 1.3 GB).
> 그 이후 호출은 빠릅니다.
>
> 📝 사용된 모델은 모두 영어 BERT입니다. 한국어 BERT로 바꾸려면 `klue/bert-base` 등으로 모델 ID를 교체하세요.
"""


def build_ui() -> gr.Blocks:
    with gr.Blocks(title="BERT 데모", theme=gr.themes.Soft()) as demo:
        gr.Markdown(INTRO_MD)

        with gr.Tabs():
            # ----- 탭 1: Masked LM -----
            with gr.Tab("1️⃣ Masked LM (사전학습)"):
                gr.Markdown(
                    "문장 어디에든 `[MASK]` 토큰을 넣어보세요. 모델이 **왼쪽과 오른쪽 문맥을 모두** "
                    "사용하여 그 자리에 올 가장 가능성 높은 단어들을 예측합니다."
                )
                mlm_in = gr.Textbox(
                    label="[MASK]가 포함된 문장",
                    value="The capital of France is [MASK].",
                    lines=2,
                )
                mlm_out = gr.Markdown()
                gr.Button("예측하기", variant="primary").click(run_mlm, mlm_in, mlm_out)
                gr.Examples(
                    examples=[
                        "The capital of France is [MASK].",
                        "I went to the bank to deposit my [MASK].",
                        "Albert Einstein was a famous [MASK].",
                        "She opened the door and [MASK] inside.",
                    ],
                    inputs=mlm_in,
                    label="예시 (클릭해서 사용)",
                )

            # ----- 탭 2: NSP -----
            with gr.Tab("2️⃣ Next Sentence Prediction (사전학습)"):
                gr.Markdown(
                    "두 문장이 주어졌을 때, 두 번째 문장이 첫 번째 문장 뒤에 실제로 이어지는 "
                    "문장인지(`IsNext`), 아니면 코퍼스에서 무작위로 뽑힌 관련 없는 문장인지"
                    "(`NotNext`)를 판단합니다."
                )
                with gr.Row():
                    nsp_a = gr.Textbox(label="문장 A", value="The man went to the store.", lines=2)
                    nsp_b = gr.Textbox(label="문장 B", value="He bought a gallon of milk.", lines=2)
                nsp_out = gr.Markdown()
                gr.Button("예측하기", variant="primary").click(run_nsp, [nsp_a, nsp_b], nsp_out)
                gr.Examples(
                    examples=[
                        ["The man went to the store.", "He bought a gallon of milk."],
                        ["The man went to the store.", "Penguins are flightless birds."],
                        ["She studied all night for the exam.", "She felt confident the next morning."],
                    ],
                    inputs=[nsp_a, nsp_b],
                    label="예시 (클릭해서 사용)",
                )

            # ----- 탭 3: 문장 쌍 분류 (MNLI) -----
            with gr.Tab("3️⃣ 문장 쌍 분류 — MNLI"):
                gr.Markdown(
                    "**Multi-Genre Natural Language Inference (MNLI).** "
                    "**전제(premise)** 와 **가설(hypothesis)** 이 주어지면, 둘의 관계를 "
                    "**함의(entailment)**, **중립(neutral)**, **모순(contradiction)** 중 하나로 분류합니다. "
                    "논문 Figure 4(a)에 해당합니다."
                )
                with gr.Row():
                    mnli_p = gr.Textbox(
                        label="전제 (Premise)",
                        value="A man inspects the uniform of a figure in some East Asian country.",
                        lines=2,
                    )
                    mnli_h = gr.Textbox(label="가설 (Hypothesis)", value="The man is sleeping.", lines=2)
                mnli_out = gr.Markdown()
                gr.Button("분류하기", variant="primary").click(run_mnli, [mnli_p, mnli_h], mnli_out)
                gr.Examples(
                    examples=[
                        ["A soccer game with multiple males playing.", "Some men are playing a sport."],
                        ["A man is playing a guitar.", "A man is sleeping."],
                        ["The dog is running through the field.", "The animal is moving."],
                    ],
                    inputs=[mnli_p, mnli_h],
                    label="예시 (클릭해서 사용)",
                )

            # ----- 탭 4: 단일 문장 분류 (SST-2) -----
            with gr.Tab("4️⃣ 단일 문장 분류 — SST-2"):
                gr.Markdown(
                    "**Stanford Sentiment Treebank (SST-2).** 영화 리뷰 문장의 감성을 "
                    "**긍정 / 부정** 이진 분류합니다. 논문 Figure 4(b)에 해당합니다."
                )
                sst_in = gr.Textbox(
                    label="문장",
                    value="This movie was absolutely fantastic — I loved every minute of it.",
                    lines=2,
                )
                sst_out = gr.Markdown()
                gr.Button("감성 분석", variant="primary").click(run_sst2, sst_in, sst_out)
                gr.Examples(
                    examples=[
                        "This movie was absolutely fantastic — I loved every minute of it.",
                        "What a complete waste of time and money.",
                        "The cinematography was breathtaking and the score was sublime.",
                        "I have never been so bored in my entire life.",
                    ],
                    inputs=sst_in,
                    label="예시 (클릭해서 사용)",
                )

            # ----- 탭 5: 질의응답 (SQuAD) -----
            with gr.Tab("5️⃣ 질의응답 — SQuAD v1.1"):
                gr.Markdown(
                    "**Stanford Question Answering Dataset (SQuAD v1.1).** 지문과 질문이 주어지면, "
                    "모델이 지문 안에서 답이 시작되는 위치와 끝나는 위치, 즉 **답변 span** 을 예측합니다. "
                    "논문 Figure 4(c)에 해당합니다."
                )
                squad_ctx = gr.Textbox(
                    label="지문 (Context)",
                    value=(
                        "BERT was introduced by researchers at Google AI Language in "
                        "October 2018. It stands for Bidirectional Encoder Representations "
                        "from Transformers and is pre-trained on the BooksCorpus (800M "
                        "words) and English Wikipedia (2,500M words). BERT-Large has 340 "
                        "million parameters."
                    ),
                    lines=6,
                )
                squad_q = gr.Textbox(label="질문 (Question)", value="How many parameters does BERT-Large have?")
                squad_out = gr.Markdown()
                gr.Button("답변 찾기", variant="primary").click(run_squad, [squad_ctx, squad_q], squad_out)
                gr.Examples(
                    examples=[
                        [
                            "BERT was introduced by researchers at Google AI Language in October 2018. It stands for Bidirectional Encoder Representations from Transformers and is pre-trained on the BooksCorpus (800M words) and English Wikipedia (2,500M words). BERT-Large has 340 million parameters.",
                            "When was BERT introduced?",
                        ],
                        [
                            "BERT was introduced by researchers at Google AI Language in October 2018. It stands for Bidirectional Encoder Representations from Transformers and is pre-trained on the BooksCorpus (800M words) and English Wikipedia (2,500M words). BERT-Large has 340 million parameters.",
                            "What does BERT stand for?",
                        ],
                    ],
                    inputs=[squad_ctx, squad_q],
                    label="예시 (클릭해서 사용)",
                )

            # ----- 탭 6: NER -----
            with gr.Tab("6️⃣ 개체명 인식 — CoNLL-2003"):
                gr.Markdown(
                    "**CoNLL-2003 NER.** 각 토큰을 인물(PER), 조직(ORG), 장소(LOC), "
                    "기타(MISC) 카테고리로 분류하는 토큰 단위 태깅 태스크입니다. "
                    "논문 Figure 4(d)에 해당합니다."
                )
                ner_in = gr.Textbox(
                    label="문장",
                    value="Jacob Devlin works at Google in Mountain View, California.",
                    lines=2,
                )
                ner_out = gr.Markdown()
                gr.Button("개체 인식", variant="primary").click(run_ner, ner_in, ner_out)
                gr.Examples(
                    examples=[
                        "Jacob Devlin works at Google in Mountain View, California.",
                        "Apple CEO Tim Cook announced the new iPhone in Cupertino.",
                        "Angela Merkel met Emmanuel Macron in Berlin last Tuesday.",
                    ],
                    inputs=ner_in,
                    label="예시 (클릭해서 사용)",
                )

        gr.Markdown(
            "---\n"
            "📄 논문: [arXiv:1810.04805](https://arxiv.org/abs/1810.04805)  •  "
            "💻 원 저자 코드: [google-research/bert](https://github.com/google-research/bert)  •  "
            "🤗 모델: [bert-base-uncased](https://huggingface.co/bert-base-uncased)"
        )

    return demo


if __name__ == "__main__":
    build_ui().launch()