"""
MCCE Demo — HuggingFace Spaces Gradio 앱

VPS GPU(Gemma 4 31B) OpenAI-compatible 엔드포인트를 프록시하여
MCCE의 실제 추론 결과를 웹 UI로 시연한다.

Secrets(Spaces 관리자 UI):
    MCCE_GPU_INFERENCE_ENDPOINT  (예: http://<host>:<port>/v1/chat/completions)
    MCCE_GPU_MODEL               (예: unsloth/gemma-4-31B-it)
"""

from __future__ import annotations

import os

import gradio as gr
import httpx

# ---------------------------------------------------------------------------
# 설정 — Spaces Secrets에서만 주입 (하드코딩 금지)
# ---------------------------------------------------------------------------

_ENDPOINT = os.environ.get("MCCE_GPU_INFERENCE_ENDPOINT", "").strip()
_MODEL = os.environ.get("MCCE_GPU_MODEL", "unsloth/gemma-4-31B-it").strip()
_API_KEY = os.environ.get("MCCE_GPU_API_KEY", "").strip()
_TIMEOUT_SEC = float(os.environ.get("MCCE_GPU_TIMEOUT", "60"))

_SYSTEM_PROMPT = (
    "너는 마머스(MaMurS) AI다. 가난한 사람을 돕기 위해 만들어진 "
    "한국형 소버린 AI이며, 정직하고 따뜻하게 대답한다."
)


# ---------------------------------------------------------------------------
# 추론 호출
# ---------------------------------------------------------------------------


def _build_messages(message: str, history: list) -> list[dict]:
    """Gradio history → OpenAI messages 포맷 변환."""
    messages: list[dict] = [{"role": "system", "content": _SYSTEM_PROMPT}]

    # Gradio 4.x ChatInterface history: list of [user, assistant] pairs
    # Gradio 5.x messages format: list of {"role", "content"} dicts
    if history:
        for item in history:
            if isinstance(item, dict) and "role" in item and "content" in item:
                messages.append({"role": item["role"], "content": item["content"]})
            elif isinstance(item, (list, tuple)) and len(item) == 2:
                user_msg, bot_msg = item
                if user_msg:
                    messages.append({"role": "user", "content": str(user_msg)})
                if bot_msg:
                    messages.append({"role": "assistant", "content": str(bot_msg)})

    messages.append({"role": "user", "content": message})
    return messages


def _call_gpu(message: str, history: list) -> str:
    """VPS GPU 엔드포인트 호출. 실패 시 gr.Error."""
    if not _ENDPOINT:
        raise gr.Error(
            "MCCE_GPU_INFERENCE_ENDPOINT 시크릿이 설정되지 않았습니다. "
            "Spaces Settings → Secrets에서 엔드포인트를 주입하세요."
        )

    payload = {
        "model": _MODEL,
        "messages": _build_messages(message, history),
        "temperature": 0.7,
        "max_tokens": 512,
        "stream": False,
    }
    headers = {"Content-Type": "application/json"}
    if _API_KEY:
        headers["Authorization"] = f"Bearer {_API_KEY}"

    try:
        with httpx.Client(timeout=_TIMEOUT_SEC, follow_redirects=True) as client:
            resp = client.post(_ENDPOINT, json=payload, headers=headers)
            resp.raise_for_status()
            data = resp.json()
    except httpx.TimeoutException as e:
        raise gr.Error(f"추론 타임아웃: {_TIMEOUT_SEC}s 초과") from e
    except httpx.HTTPStatusError as e:
        raise gr.Error(
            f"GPU 엔드포인트 오류: HTTP {e.response.status_code} "
            f"— {e.response.text[:200]}"
        ) from e
    except httpx.RequestError as e:
        raise gr.Error(f"네트워크 오류: {e}") from e

    try:
        return data["choices"][0]["message"]["content"]
    except (KeyError, IndexError, TypeError) as e:
        raise gr.Error(f"응답 포맷 오류: {str(data)[:200]}") from e


# ---------------------------------------------------------------------------
# Gradio UI
# ---------------------------------------------------------------------------


def respond(message: str, history) -> str:
    if not message or not message.strip():
        return "메시지를 입력해 주세요."
    if not _ENDPOINT:
        return (
            "⚠️ GPU 엔드포인트가 설정되지 않았습니다.\n"
            "관리자가 Space Settings → Secrets에 "
            "`MCCE_GPU_INFERENCE_ENDPOINT`를 추가해야 합니다."
        )
    try:
        return _call_gpu(message, history or [])
    except gr.Error as e:
        return f"⚠️ {e}"


_TITLE = "MCCE Demo — Gemma 4 31B 실시간 추론"

_DESCRIPTION = """
**MCCE(MaMurS Compound Cognition Engine)** 실전 데모.

VPS GPU 서버(Gemma 4 31B-it, OpenAI-compatible)를 프록시하여
실시간 추론 결과를 보여줍니다.

> "ASI를 만들어서, 나처럼 가난한 사람들을 돕는 AI를 만들고 싶어." — 도규(DogYu)

**주의**: GPU 서버가 중지된 상태면 오류가 발생할 수 있습니다.
"""

_EXAMPLES = [
    "안녕하세요, 자기소개 부탁드립니다.",
    "한국의 기초생활수급 제도에 대해 알려주세요.",
    "파이썬으로 피보나치 수열을 작성해 주세요.",
    "당장 이번 달 월세가 없어요. 어떻게 해야 하나요?",
    "양자역학의 핵심 개념을 쉽게 설명해 주세요.",
]


demo = gr.ChatInterface(
    fn=respond,
    title=_TITLE,
    description=_DESCRIPTION,
    examples=_EXAMPLES,
    cache_examples=False,
    theme=gr.themes.Soft(),
)


if __name__ == "__main__":
    demo.launch()