Spaces:

scvcoder
/

kpaa

Paused

App Files Files Community

scvcoder commited on May 2

Commit

617c061

verified ·

1 Parent(s): 1a36a19

Hybrid: demo.launch() (ZeroGPU works) + post-launch attach KPAA /v1 routes to demo.app

Browse files

Files changed (1) hide show

app.py +104 -18

app.py CHANGED Viewed

@@ -1,46 +1,132 @@
-"""KPAA Backend Space — MINIMAL TEST.
-Goal: verify HF Spaces ZeroGPU activates with the absolute simplest pattern
-(Gradio Blocks + @spaces.GPU + demo.launch). Once this works, build KPAA on top.
 """
 import os
 import sys
 from pathlib import Path
-print(f"[diag] SPACES_ZERO_GPU={os.environ.get('SPACES_ZERO_GPU')!r}", flush=True)
-print(f"[diag] SPACE_ID={os.environ.get('SPACE_ID')!r}", flush=True)
-# Make src/ importable for later (when we add KPAA back).
 sys.path.insert(0, str(Path(__file__).resolve().parent / "src"))
 import spaces
 import gradio as gr
-import torch
-print(f"[diag] cuda_available={torch.cuda.is_available()}", flush=True)
 @spaces.GPU(duration=10)
 def echo(text: str) -> str:
-    """Trivial GPU function — proves ZeroGPU is wired."""
     import torch
     device = "cuda" if torch.cuda.is_available() else "cpu"
-    t = torch.tensor([1.0]).to(device)
-    return f"echo on {device}: {text} (sum={t.sum().item()})"
-with gr.Blocks(title="KPAA Backend (minimal)") as demo:
-    gr.Markdown("# KPAA Backend — minimal ZeroGPU test")
-    inp = gr.Textbox(label="input", value="hello")
-    out = gr.Textbox(label="output")
-    btn = gr.Button("run on GPU")
     btn.click(echo, inputs=inp, outputs=out)
 if __name__ == "__main__":
-    demo.queue().launch(
         server_name="0.0.0.0",
         server_port=int(os.environ.get("PORT", "7860")),
         ssr_mode=False,
         show_api=False,
     )

+"""KPAA Backend Space — Gradio + ZeroGPU + KPAA OpenAI-compatible API.
+Strategy validated via minimal test:
+  - demo.launch() (Gradio's own uvicorn) is the path that activates ZeroGPU.
+  - mount_gradio_app + manual uvicorn does NOT activate ZeroGPU.
+So we use demo.launch(), and AFTER launch we attach KPAA's /v1 routes to
+the underlying FastAPI (demo.app) via app.include_router. Routes added at
+runtime are picked up because Starlette dispatches by traversing app.routes
+on each request.
+Hardware: ZeroGPU (zero-a10g).
+Required secret: LAW_OC.
 """
 import os
 import sys
+import time
 from pathlib import Path
+print(f"[kpaa-backend] SPACES_ZERO_GPU={os.environ.get('SPACES_ZERO_GPU')!r}", flush=True)
+print(f"[kpaa-backend] SPACE_ID={os.environ.get('SPACE_ID')!r}", flush=True)
+# HF Spaces: src/ on sys.path
 sys.path.insert(0, str(Path(__file__).resolve().parent / "src"))
+# ─── monkey-patch: gradio_client `/api_info` schema bug ────────────────────
+import gradio_client.utils as _gc_utils
+_orig_get_type = _gc_utils.get_type
+_orig_jstpt = _gc_utils._json_schema_to_python_type
+def _safe_get_type(schema):
+    if not isinstance(schema, dict):
+        return ""
+    return _orig_get_type(schema)
+def _safe_jstpt(schema, defs):
+    if not isinstance(schema, dict):
+        return "Any"
+    return _orig_jstpt(schema, defs)
+_gc_utils.get_type = _safe_get_type
+_gc_utils._json_schema_to_python_type = _safe_jstpt
+# ──────────────────────────────────────────────────────────────────────────
 import spaces
 import gradio as gr
+# ─── ZeroGPU canary wired to a Gradio event ───────────────────────────────
+# Critical insight: HF detector requires @spaces.GPU functions to be wired
+# to Gradio components, not standalone. So we keep `echo` as a real button
+# handler in the status UI.
 @spaces.GPU(duration=10)
 def echo(text: str) -> str:
     import torch
     device = "cuda" if torch.cuda.is_available() else "cpu"
+    return f"GPU echo ({device}): {text}"
+with gr.Blocks(title="KPAA Backend") as demo:
+    gr.Markdown(
+        """
+        # 🧠 KPAA Backend
+        한국 개인정보보호법 RAG 추론 백엔드.
+        ## API
+        - `POST /v1/chat/completions`
+        - `GET  /v1/models`
+        - `GET  /healthz`
+        UI 는 [`scvcoder/korean-privacy-ai-assistant`](https://huggingface.co/spaces/scvcoder/korean-privacy-ai-assistant) 에서 제공.
+        ---
+        ### GPU 진단
+        """
+    )
+    with gr.Row():
+        inp = gr.Textbox(label="입력", value="hello", scale=3)
+        out = gr.Textbox(label="출력 (GPU 검증)", scale=3)
+    btn = gr.Button("GPU echo 테스트")
     btn.click(echo, inputs=inp, outputs=out)
+def _attach_kpaa_routes() -> None:
+    """Mount KPAA OpenAI-compatible /v1 routes onto demo's FastAPI.
+    Called AFTER demo.launch() — demo.app is the live Gradio FastAPI by then.
+    We use include_router for clean route attachment.
+    """
+    from kpaa.server import create_app
+    kpaa_app = create_app()
+    # FastAPI's `include_router` re-registers each route on the parent. Simpler:
+    # iterate kpaa_app.routes and append to demo.app.routes.
+    n_added = 0
+    skipped = 0
+    for route in kpaa_app.routes:
+        # Skip routes that would conflict with Gradio (e.g., '/').
+        path = getattr(route, "path", None)
+        if path in ("/", None):
+            skipped += 1
+            continue
+        demo.app.routes.append(route)
+        n_added += 1
+    print(f"[kpaa-backend] attached {n_added} KPAA routes (skipped {skipped})", flush=True)
 if __name__ == "__main__":
+    # Launch Gradio in a non-blocking way so we can patch demo.app afterwards.
+    demo.queue()
+    demo.launch(
         server_name="0.0.0.0",
         server_port=int(os.environ.get("PORT", "7860")),
         ssr_mode=False,
         show_api=False,
+        prevent_thread_lock=True,
     )
+    # demo.app is now a live Starlette/FastAPI app — attach KPAA routes.
+    _attach_kpaa_routes()
+    print("[kpaa-backend] ready: Gradio at /, KPAA OpenAI routes at /v1/...", flush=True)
+    # Block forever (Gradio runs on background thread).
+    while True:
+        time.sleep(60)