| """KPAA Backend Space — Gradio + ZeroGPU + KPAA OpenAI-compatible API. |
| |
| Strategy validated via minimal test: |
| - demo.launch() (Gradio's own uvicorn) is the path that activates ZeroGPU. |
| - mount_gradio_app + manual uvicorn does NOT activate ZeroGPU. |
| |
| So we use demo.launch(), and AFTER launch we attach KPAA's /v1 routes to |
| the underlying FastAPI (demo.app) via app.include_router. Routes added at |
| runtime are picked up because Starlette dispatches by traversing app.routes |
| on each request. |
| |
| Hardware: ZeroGPU (zero-a10g). |
| Required secret: LAW_OC. |
| """ |
| import os |
| import sys |
| import time |
| from pathlib import Path |
|
|
| print(f"[kpaa-backend] SPACES_ZERO_GPU={os.environ.get('SPACES_ZERO_GPU')!r}", flush=True) |
| print(f"[kpaa-backend] SPACE_ID={os.environ.get('SPACE_ID')!r}", flush=True) |
|
|
| |
| sys.path.insert(0, str(Path(__file__).resolve().parent / "src")) |
|
|
|
|
| |
| import gradio_client.utils as _gc_utils |
|
|
| _orig_get_type = _gc_utils.get_type |
| _orig_jstpt = _gc_utils._json_schema_to_python_type |
|
|
|
|
| def _safe_get_type(schema): |
| if not isinstance(schema, dict): |
| return "" |
| return _orig_get_type(schema) |
|
|
|
|
| def _safe_jstpt(schema, defs): |
| if not isinstance(schema, dict): |
| return "Any" |
| return _orig_jstpt(schema, defs) |
|
|
|
|
| _gc_utils.get_type = _safe_get_type |
| _gc_utils._json_schema_to_python_type = _safe_jstpt |
| |
|
|
|
|
| import spaces |
| import gradio as gr |
|
|
|
|
| |
| |
| |
| |
| @spaces.GPU(duration=10) |
| def echo(text: str) -> str: |
| import torch |
| device = "cuda" if torch.cuda.is_available() else "cpu" |
| return f"GPU echo ({device}): {text}" |
|
|
|
|
| with gr.Blocks(title="KPAA Backend") as demo: |
| gr.Markdown( |
| """ |
| # 🧠 KPAA Backend |
| |
| 한국 개인정보보호법 RAG 추론 백엔드. |
| |
| ## API |
| - `POST /v1/chat/completions` |
| - `GET /v1/models` |
| - `GET /healthz` |
| |
| UI 는 [`scvcoder/korean-privacy-ai-assistant`](https://huggingface.co/spaces/scvcoder/korean-privacy-ai-assistant) 에서 제공. |
| |
| --- |
| ### GPU 진단 |
| """ |
| ) |
| with gr.Row(): |
| inp = gr.Textbox(label="입력", value="hello", scale=3) |
| out = gr.Textbox(label="출력 (GPU 검증)", scale=3) |
| btn = gr.Button("GPU echo 테스트") |
| btn.click(echo, inputs=inp, outputs=out) |
|
|
|
|
| def _attach_kpaa_routes() -> None: |
| """Mount KPAA OpenAI-compatible /v1 routes onto demo's FastAPI. |
| |
| Called AFTER demo.launch() — demo.app is the live Gradio FastAPI by then. |
| """ |
| from kpaa.server import create_app |
| kpaa_app = create_app() |
|
|
| n_added = 0 |
| skipped = 0 |
| for route in kpaa_app.routes: |
| path = getattr(route, "path", None) |
| if path in ("/", None): |
| skipped += 1 |
| continue |
| demo.app.routes.append(route) |
| n_added += 1 |
| print(f"[kpaa-backend] attached {n_added} KPAA routes (skipped {skipped})", flush=True) |
|
|
|
|
| def _attach_split_view() -> None: |
| """`/split` endpoint — Open WebUI iframe + 참고자료 polling 분할 레이아웃. |
| |
| KPAA local 의 _SPLIT_HTML 을 그대로 재사용하되 iframe src 만 UI Space URL 로 |
| 교체. / 접속 시 /split 으로 리다이렉트 — Gradio 가 / 를 점유하지만 우리 |
| redirect 라우트를 routes 리스트 *앞* 에 끼워넣어 우선권 획득. |
| """ |
| from fastapi.responses import HTMLResponse, RedirectResponse |
| from fastapi.routing import APIRoute |
|
|
| from kpaa.server import _SPLIT_HTML |
|
|
| UI_SPACE_URL = "https://scvcoder-korean-privacy-ai-assistant.hf.space" |
| hf_html = _SPLIT_HTML.replace( |
| 'src="http://localhost:8080/"', |
| f'src="{UI_SPACE_URL}"', |
| ) |
|
|
| |
| |
| async def _split_handler(): |
| import time as _time |
| from kpaa.server import _last_refs |
|
|
| _last_refs.update({ |
| "ts": _time.time(), |
| "query": "", |
| "intents": [], |
| "jo_targets": [], |
| "elapsed_ms": 0, |
| "excerpts": [], |
| "cited_citations": [], |
| "llm_excerpt_citations": [], |
| "geungeo_indices_in_answer": [], |
| }) |
| return HTMLResponse(hf_html) |
|
|
| |
| demo.app.routes.insert( |
| 0, |
| APIRoute("/split", _split_handler, methods=["GET"], include_in_schema=False), |
| ) |
|
|
| |
| |
| |
| demo.app.routes.insert( |
| 0, |
| APIRoute("/", _split_handler, methods=["GET"], include_in_schema=False), |
| ) |
|
|
| print(f"[kpaa-backend] / and /split serve split HTML (UI iframe -> {UI_SPACE_URL})", flush=True) |
|
|
|
|
| if __name__ == "__main__": |
| |
| demo.queue() |
| demo.launch( |
| server_name="0.0.0.0", |
| server_port=int(os.environ.get("PORT", "7860")), |
| ssr_mode=False, |
| show_api=False, |
| prevent_thread_lock=True, |
| ) |
|
|
| |
| _attach_kpaa_routes() |
| _attach_split_view() |
| print("[kpaa-backend] ready: Gradio at /, /v1/... API, /split (Open WebUI + 참고자료)", flush=True) |
|
|
| |
| while True: |
| time.sleep(60) |
|
|