"""KPAA Backend Space — Gradio + ZeroGPU + KPAA OpenAI-compatible API. Strategy validated via minimal test: - demo.launch() (Gradio's own uvicorn) is the path that activates ZeroGPU. - mount_gradio_app + manual uvicorn does NOT activate ZeroGPU. So we use demo.launch(), and AFTER launch we attach KPAA's /v1 routes to the underlying FastAPI (demo.app) via app.include_router. Routes added at runtime are picked up because Starlette dispatches by traversing app.routes on each request. Hardware: ZeroGPU (zero-a10g). Required secret: LAW_OC. """ import os import sys import time from pathlib import Path print(f"[kpaa-backend] SPACES_ZERO_GPU={os.environ.get('SPACES_ZERO_GPU')!r}", flush=True) print(f"[kpaa-backend] SPACE_ID={os.environ.get('SPACE_ID')!r}", flush=True) # HF Spaces: src/ on sys.path sys.path.insert(0, str(Path(__file__).resolve().parent / "src")) # ─── monkey-patch: gradio_client `/api_info` schema bug ──────────────────── import gradio_client.utils as _gc_utils _orig_get_type = _gc_utils.get_type _orig_jstpt = _gc_utils._json_schema_to_python_type def _safe_get_type(schema): if not isinstance(schema, dict): return "" return _orig_get_type(schema) def _safe_jstpt(schema, defs): if not isinstance(schema, dict): return "Any" return _orig_jstpt(schema, defs) _gc_utils.get_type = _safe_get_type _gc_utils._json_schema_to_python_type = _safe_jstpt # ────────────────────────────────────────────────────────────────────────── import spaces import gradio as gr # ─── ZeroGPU canary wired to a Gradio event ─────────────────────────────── # Critical insight: HF detector requires @spaces.GPU functions to be wired # to Gradio components, not standalone. So we keep `echo` as a real button # handler in the status UI. @spaces.GPU(duration=10) def echo(text: str) -> str: import torch device = "cuda" if torch.cuda.is_available() else "cpu" return f"GPU echo ({device}): {text}" with gr.Blocks(title="KPAA Backend") as demo: gr.Markdown( """ # 🧠 KPAA Backend 한국 개인정보보호법 RAG 추론 백엔드. ## API - `POST /v1/chat/completions` - `GET /v1/models` - `GET /healthz` UI 는 [`scvcoder/korean-privacy-ai-assistant`](https://huggingface.co/spaces/scvcoder/korean-privacy-ai-assistant) 에서 제공. --- ### GPU 진단 """ ) with gr.Row(): inp = gr.Textbox(label="입력", value="hello", scale=3) out = gr.Textbox(label="출력 (GPU 검증)", scale=3) btn = gr.Button("GPU echo 테스트") btn.click(echo, inputs=inp, outputs=out) def _attach_kpaa_routes() -> None: """Mount KPAA OpenAI-compatible /v1 routes onto demo's FastAPI. Called AFTER demo.launch() — demo.app is the live Gradio FastAPI by then. """ from kpaa.server import create_app kpaa_app = create_app() n_added = 0 skipped = 0 for route in kpaa_app.routes: path = getattr(route, "path", None) if path in ("/", None): skipped += 1 continue demo.app.routes.append(route) n_added += 1 print(f"[kpaa-backend] attached {n_added} KPAA routes (skipped {skipped})", flush=True) def _attach_split_view() -> None: """`/split` endpoint — Open WebUI iframe + 참고자료 polling 분할 레이아웃. KPAA local 의 _SPLIT_HTML 을 그대로 재사용하되 iframe src 만 UI Space URL 로 교체. / 접속 시 /split 으로 리다이렉트 — Gradio 가 / 를 점유하지만 우리 redirect 라우트를 routes 리스트 *앞* 에 끼워넣어 우선권 획득. """ from fastapi.responses import HTMLResponse, RedirectResponse from fastapi.routing import APIRoute from kpaa.server import _SPLIT_HTML UI_SPACE_URL = "https://scvcoder-korean-privacy-ai-assistant.hf.space" hf_html = _SPLIT_HTML.replace( 'src="http://localhost:8080/"', f'src="{UI_SPACE_URL}"', ) # 핸들러 한 개를 /split 와 / 양쪽에 라우팅 — 동일 HTML + 페이지 진입 시 # 우측 참고자료 자동 초기화 (이전 세션 잔여 데이터 노출 방지). async def _split_handler(): import time as _time from kpaa.server import _last_refs _last_refs.update({ "ts": _time.time(), "query": "", "intents": [], "jo_targets": [], "elapsed_ms": 0, "excerpts": [], "cited_citations": [], "llm_excerpt_citations": [], "geungeo_indices_in_answer": [], }) return HTMLResponse(hf_html) # /split — 명시적 별칭 (백워드 호환). demo.app.routes.insert( 0, APIRoute("/split", _split_handler, methods=["GET"], include_in_schema=False), ) # / — Gradio 의 / 보다 *앞* 에 끼워 넣어 우선권 획득. 사용자가 백엔드 URL 만 # 입력해도 분할 화면이 바로 보임. Gradio status UI 는 더 이상 노출되지 않지만 # ZeroGPU 검출은 module-level @spaces.GPU 캐나리로 이미 충족됨. demo.app.routes.insert( 0, APIRoute("/", _split_handler, methods=["GET"], include_in_schema=False), ) print(f"[kpaa-backend] / and /split serve split HTML (UI iframe -> {UI_SPACE_URL})", flush=True) if __name__ == "__main__": # Launch Gradio in a non-blocking way so we can patch demo.app afterwards. demo.queue() demo.launch( server_name="0.0.0.0", server_port=int(os.environ.get("PORT", "7860")), ssr_mode=False, show_api=False, prevent_thread_lock=True, ) # demo.app is now a live Starlette/FastAPI app — attach KPAA routes + split view. _attach_kpaa_routes() _attach_split_view() print("[kpaa-backend] ready: Gradio at /, /v1/... API, /split (Open WebUI + 참고자료)", flush=True) # Block forever (Gradio runs on background thread). while True: time.sleep(60)