kpaa / app_backend.py
scvcoder's picture
Cleanup: dead code, route deletion (/info, /chat, /api/*), comment polish, auth mode docs, URL rename
9344f01 verified
"""KPAA Backend Space — Gradio + ZeroGPU + KPAA OpenAI-compatible API.
Strategy validated via minimal test:
- demo.launch() (Gradio's own uvicorn) is the path that activates ZeroGPU.
- mount_gradio_app + manual uvicorn does NOT activate ZeroGPU.
So we use demo.launch(), and AFTER launch we attach KPAA's /v1 routes to
the underlying FastAPI (demo.app) via app.include_router. Routes added at
runtime are picked up because Starlette dispatches by traversing app.routes
on each request.
Hardware: ZeroGPU (zero-a10g).
Required secret: LAW_OC.
"""
import os
import sys
import time
from pathlib import Path
print(f"[kpaa-backend] SPACES_ZERO_GPU={os.environ.get('SPACES_ZERO_GPU')!r}", flush=True)
print(f"[kpaa-backend] SPACE_ID={os.environ.get('SPACE_ID')!r}", flush=True)
# HF Spaces: src/ on sys.path
sys.path.insert(0, str(Path(__file__).resolve().parent / "src"))
# ─── monkey-patch: gradio_client `/api_info` schema bug ────────────────────
import gradio_client.utils as _gc_utils
_orig_get_type = _gc_utils.get_type
_orig_jstpt = _gc_utils._json_schema_to_python_type
def _safe_get_type(schema):
if not isinstance(schema, dict):
return ""
return _orig_get_type(schema)
def _safe_jstpt(schema, defs):
if not isinstance(schema, dict):
return "Any"
return _orig_jstpt(schema, defs)
_gc_utils.get_type = _safe_get_type
_gc_utils._json_schema_to_python_type = _safe_jstpt
# ──────────────────────────────────────────────────────────────────────────
import spaces
import gradio as gr
# ─── ZeroGPU canary wired to a Gradio event ───────────────────────────────
# Critical insight: HF detector requires @spaces.GPU functions to be wired
# to Gradio components, not standalone. So we keep `echo` as a real button
# handler in the status UI.
@spaces.GPU(duration=10)
def echo(text: str) -> str:
import torch
device = "cuda" if torch.cuda.is_available() else "cpu"
return f"GPU echo ({device}): {text}"
with gr.Blocks(title="KPAA Backend") as demo:
gr.Markdown(
"""
# 🧠 KPAA Backend
한국 개인정보보호법 RAG 추론 백엔드.
## API
- `POST /v1/chat/completions`
- `GET /v1/models`
- `GET /healthz`
UI 는 [`scvcoder/korean-privacy-ai-assistant`](https://huggingface.co/spaces/scvcoder/korean-privacy-ai-assistant) 에서 제공.
---
### GPU 진단
"""
)
with gr.Row():
inp = gr.Textbox(label="입력", value="hello", scale=3)
out = gr.Textbox(label="출력 (GPU 검증)", scale=3)
btn = gr.Button("GPU echo 테스트")
btn.click(echo, inputs=inp, outputs=out)
def _attach_kpaa_routes() -> None:
"""Mount KPAA OpenAI-compatible /v1 routes onto demo's FastAPI.
Called AFTER demo.launch() — demo.app is the live Gradio FastAPI by then.
"""
from kpaa.server import create_app
kpaa_app = create_app()
n_added = 0
skipped = 0
for route in kpaa_app.routes:
path = getattr(route, "path", None)
if path in ("/", None):
skipped += 1
continue
demo.app.routes.append(route)
n_added += 1
print(f"[kpaa-backend] attached {n_added} KPAA routes (skipped {skipped})", flush=True)
def _attach_split_view() -> None:
"""`/split` endpoint — Open WebUI iframe + 참고자료 polling 분할 레이아웃.
KPAA local 의 _SPLIT_HTML 을 그대로 재사용하되 iframe src 만 UI Space URL 로
교체. / 접속 시 /split 으로 리다이렉트 — Gradio 가 / 를 점유하지만 우리
redirect 라우트를 routes 리스트 *앞* 에 끼워넣어 우선권 획득.
"""
from fastapi.responses import HTMLResponse, RedirectResponse
from fastapi.routing import APIRoute
from kpaa.server import _SPLIT_HTML
UI_SPACE_URL = "https://scvcoder-korean-privacy-ai-assistant.hf.space"
hf_html = _SPLIT_HTML.replace(
'src="http://localhost:8080/"',
f'src="{UI_SPACE_URL}"',
)
# 핸들러 한 개를 /split 와 / 양쪽에 라우팅 — 동일 HTML + 페이지 진입 시
# 우측 참고자료 자동 초기화 (이전 세션 잔여 데이터 노출 방지).
async def _split_handler():
import time as _time
from kpaa.server import _last_refs
_last_refs.update({
"ts": _time.time(),
"query": "",
"intents": [],
"jo_targets": [],
"elapsed_ms": 0,
"excerpts": [],
"cited_citations": [],
"llm_excerpt_citations": [],
"geungeo_indices_in_answer": [],
})
return HTMLResponse(hf_html)
# /split — 명시적 별칭 (백워드 호환).
demo.app.routes.insert(
0,
APIRoute("/split", _split_handler, methods=["GET"], include_in_schema=False),
)
# / — Gradio 의 / 보다 *앞* 에 끼워 넣어 우선권 획득. 사용자가 백엔드 URL 만
# 입력해도 분할 화면이 바로 보임. Gradio status UI 는 더 이상 노출되지 않지만
# ZeroGPU 검출은 module-level @spaces.GPU 캐나리로 이미 충족됨.
demo.app.routes.insert(
0,
APIRoute("/", _split_handler, methods=["GET"], include_in_schema=False),
)
print(f"[kpaa-backend] / and /split serve split HTML (UI iframe -> {UI_SPACE_URL})", flush=True)
if __name__ == "__main__":
# Launch Gradio in a non-blocking way so we can patch demo.app afterwards.
demo.queue()
demo.launch(
server_name="0.0.0.0",
server_port=int(os.environ.get("PORT", "7860")),
ssr_mode=False,
show_api=False,
prevent_thread_lock=True,
)
# demo.app is now a live Starlette/FastAPI app — attach KPAA routes + split view.
_attach_kpaa_routes()
_attach_split_view()
print("[kpaa-backend] ready: Gradio at /, /v1/... API, /split (Open WebUI + 참고자료)", flush=True)
# Block forever (Gradio runs on background thread).
while True:
time.sleep(60)