"""KPAA Backend Space — Gradio + ZeroGPU + KPAA OpenAI-compatible API.

Strategy validated via minimal test:
  - demo.launch() (Gradio's own uvicorn) is the path that activates ZeroGPU.
  - mount_gradio_app + manual uvicorn does NOT activate ZeroGPU.

So we use demo.launch(), and AFTER launch we attach KPAA's /v1 routes to
the underlying FastAPI (demo.app) via app.include_router. Routes added at
runtime are picked up because Starlette dispatches by traversing app.routes
on each request.

Hardware: ZeroGPU (zero-a10g).
Required secret: LAW_OC.
"""
import os
import sys
import time
from pathlib import Path

print(f"[kpaa-backend] SPACES_ZERO_GPU={os.environ.get('SPACES_ZERO_GPU')!r}", flush=True)
print(f"[kpaa-backend] SPACE_ID={os.environ.get('SPACE_ID')!r}", flush=True)

# HF Spaces: src/ on sys.path
sys.path.insert(0, str(Path(__file__).resolve().parent / "src"))


# ─── monkey-patch: gradio_client `/api_info` schema bug ────────────────────
import gradio_client.utils as _gc_utils

_orig_get_type = _gc_utils.get_type
_orig_jstpt = _gc_utils._json_schema_to_python_type


def _safe_get_type(schema):
    if not isinstance(schema, dict):
        return ""
    return _orig_get_type(schema)


def _safe_jstpt(schema, defs):
    if not isinstance(schema, dict):
        return "Any"
    return _orig_jstpt(schema, defs)


_gc_utils.get_type = _safe_get_type
_gc_utils._json_schema_to_python_type = _safe_jstpt
# ──────────────────────────────────────────────────────────────────────────


import spaces
import gradio as gr


# ─── ZeroGPU canary wired to a Gradio event ───────────────────────────────
# Critical insight: HF detector requires @spaces.GPU functions to be wired
# to Gradio components, not standalone. So we keep `echo` as a real button
# handler in the status UI.
@spaces.GPU(duration=10)
def echo(text: str) -> str:
    import torch
    device = "cuda" if torch.cuda.is_available() else "cpu"
    return f"GPU echo ({device}): {text}"


with gr.Blocks(title="KPAA Backend") as demo:
    gr.Markdown(
        """
        # 🧠 KPAA Backend

        한국 개인정보보호법 RAG 추론 백엔드.

        ## API
        - `POST /v1/chat/completions`
        - `GET  /v1/models`
        - `GET  /healthz`

        UI 는 [`scvcoder/korean-privacy-ai-assistant`](https://huggingface.co/spaces/scvcoder/korean-privacy-ai-assistant) 에서 제공.

        ---
        ### GPU 진단
        """
    )
    with gr.Row():
        inp = gr.Textbox(label="입력", value="hello", scale=3)
        out = gr.Textbox(label="출력 (GPU 검증)", scale=3)
    btn = gr.Button("GPU echo 테스트")
    btn.click(echo, inputs=inp, outputs=out)


def _attach_kpaa_routes() -> None:
    """Mount KPAA OpenAI-compatible /v1 routes onto demo's FastAPI.

    Called AFTER demo.launch() — demo.app is the live Gradio FastAPI by then.
    """
    from kpaa.server import create_app
    kpaa_app = create_app()

    n_added = 0
    skipped = 0
    for route in kpaa_app.routes:
        path = getattr(route, "path", None)
        if path in ("/", None):
            skipped += 1
            continue
        demo.app.routes.append(route)
        n_added += 1
    print(f"[kpaa-backend] attached {n_added} KPAA routes (skipped {skipped})", flush=True)


def _attach_split_view() -> None:
    """`/split` endpoint — Open WebUI iframe + 참고자료 polling 분할 레이아웃.

    KPAA local 의 _SPLIT_HTML 을 그대로 재사용하되 iframe src 만 UI Space URL 로
    교체. / 접속 시 /split 으로 리다이렉트 — Gradio 가 / 를 점유하지만 우리
    redirect 라우트를 routes 리스트 *앞* 에 끼워넣어 우선권 획득.
    """
    from fastapi.responses import HTMLResponse, RedirectResponse
    from fastapi.routing import APIRoute

    from kpaa.server import _SPLIT_HTML

    UI_SPACE_URL = "https://scvcoder-korean-privacy-ai-assistant.hf.space"
    hf_html = _SPLIT_HTML.replace(
        'src="http://localhost:8080/"',
        f'src="{UI_SPACE_URL}"',
    )

    # 핸들러 한 개를 /split 와 / 양쪽에 라우팅 — 동일 HTML + 페이지 진입 시
    # 우측 참고자료 자동 초기화 (이전 세션 잔여 데이터 노출 방지).
    async def _split_handler():
        import time as _time
        from kpaa.server import _last_refs

        _last_refs.update({
            "ts": _time.time(),
            "query": "",
            "intents": [],
            "jo_targets": [],
            "elapsed_ms": 0,
            "excerpts": [],
            "cited_citations": [],
            "llm_excerpt_citations": [],
            "geungeo_indices_in_answer": [],
        })
        return HTMLResponse(hf_html)

    # /split — 명시적 별칭 (백워드 호환).
    demo.app.routes.insert(
        0,
        APIRoute("/split", _split_handler, methods=["GET"], include_in_schema=False),
    )

    # / — Gradio 의 / 보다 *앞* 에 끼워 넣어 우선권 획득. 사용자가 백엔드 URL 만
    # 입력해도 분할 화면이 바로 보임. Gradio status UI 는 더 이상 노출되지 않지만
    # ZeroGPU 검출은 module-level @spaces.GPU 캐나리로 이미 충족됨.
    demo.app.routes.insert(
        0,
        APIRoute("/", _split_handler, methods=["GET"], include_in_schema=False),
    )

    print(f"[kpaa-backend] / and /split serve split HTML (UI iframe -> {UI_SPACE_URL})", flush=True)


if __name__ == "__main__":
    # Launch Gradio in a non-blocking way so we can patch demo.app afterwards.
    demo.queue()
    demo.launch(
        server_name="0.0.0.0",
        server_port=int(os.environ.get("PORT", "7860")),
        ssr_mode=False,
        show_api=False,
        prevent_thread_lock=True,
    )

    # demo.app is now a live Starlette/FastAPI app — attach KPAA routes + split view.
    _attach_kpaa_routes()
    _attach_split_view()
    print("[kpaa-backend] ready: Gradio at /, /v1/... API, /split (Open WebUI + 참고자료)", flush=True)

    # Block forever (Gradio runs on background thread).
    while True:
        time.sleep(60)