Spaces:

scvcoder
/

kpaa

Paused

App Files Files Community

scvcoder commited on May 2

Commit

1a36a19

verified ·

1 Parent(s): f22d865

MINIMAL TEST: Gradio + @spaces.GPU + demo.launch (HF ZeroGPU canonical pattern)

Browse files

Files changed (1) hide show

app.py +29 -112

app.py CHANGED Viewed

@@ -1,129 +1,46 @@
-"""KPAA Backend Space — OpenAI-compatible API on ZeroGPU.
-Key insight from Gradio 5.20 source (routes.py:1726-1750, node_server.py:74-95):
-- mount_gradio_app(..., ssr_mode=...) parameter takes precedence over GRADIO_SSR_MODE env var
-- HF Spaces seems to default GRADIO_SSR_MODE=True (we saw "with SSR ⚡" in earlier logs)
-- When ssr_mode=True, start_node_process() does s.bind((server_name, port)) which is the
-  source of "address already in use" on 7861
-Fix: pass explicit ssr_mode=False to mount_gradio_app — disables Node SSR subprocess
-without relying on env var override (which HF might pre-set).
-The lifespan hook from mount_gradio_app (routes.py:1758-1770) is what activates
-ZeroGPU detection. Manual mount without that hook → "No @spaces.GPU detected".
-We need both: hook present AND SSR disabled.
 """
-from __future__ import annotations
 import os
 import sys
 from pathlib import Path
-# Force-set (not setdefault) so we override any HF Spaces pre-set value.
-os.environ["GRADIO_SSR_MODE"] = "False"
-os.environ["GRADIO_SHARE"] = "False"
-os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
-# HF Spaces: src/ 를 sys.path 에 prepend
 sys.path.insert(0, str(Path(__file__).resolve().parent / "src"))
-# ─── monkey-patch: gradio_client `/api_info` schema bug ────────────────────
-import gradio_client.utils as _gc_utils  # noqa: E402
-_orig_get_type = _gc_utils.get_type
-_orig_jstpt = _gc_utils._json_schema_to_python_type
-def _safe_get_type(schema):
-    if not isinstance(schema, dict):
-        return ""
-    return _orig_get_type(schema)
-def _safe_jstpt(schema, defs):
-    if not isinstance(schema, dict):
-        return "Any"
-    return _orig_jstpt(schema, defs)
-_gc_utils.get_type = _safe_get_type
-_gc_utils._json_schema_to_python_type = _safe_jstpt
-# ──────────────────────────────────────────────────────────────────────────
-# ─── ZeroGPU startup canary + diagnostics ─────────────────────────────────
-print(f"[kpaa-backend] SPACES_ZERO_GPU={os.environ.get('SPACES_ZERO_GPU')!r}", flush=True)
-print(f"[kpaa-backend] SPACE_ID={os.environ.get('SPACE_ID')!r}", flush=True)
-import spaces  # noqa: E402
-# Correct import path: Config lives at spaces.config.Config (top-level config module),
-# not spaces.zero.config. Wrapped in try so a future API change doesn't crash startup.
-try:
-    from spaces.config import Config as _spcfg
-    print(f"[kpaa-backend] spaces.Config.zero_gpu={_spcfg.zero_gpu}", flush=True)
-except Exception as e:
-    print(f"[kpaa-backend] could not read spaces.Config: {e!r}", flush=True)
-@spaces.GPU(duration=1)
-def _zerogpu_startup_canary() -> None:
-    return None
-print(
-    f"[kpaa-backend] canary 'zerogpu' attr: "
-    f"{getattr(_zerogpu_startup_canary, 'zerogpu', '<not set — decorator was no-op>')}",
-    flush=True,
-)
-# ──────────────────────────────────────────────────────────────────────────
-import gradio as gr  # noqa: E402
-from kpaa.server import create_app  # noqa: E402
-# Build FastAPI from KPAA's existing OpenAI-compatible server.
-fastapi_app = create_app()
-# Minimal Gradio Blocks for ZeroGPU detection + status page.
-with gr.Blocks(title="KPAA Backend") as demo:
-    gr.Markdown(
-        """
-        # 🧠 KPAA Backend
-        한국 개인정보보호법 RAG 추론 서버 (OpenAI 호환 API).
-        - `POST /v1/chat/completions`
-        - `GET  /v1/models`
-        - `GET  /healthz`, `/info`
-        UI 는 [`scvcoder/korean-privacy-ai-assistant`](https://huggingface.co/spaces/scvcoder/korean-privacy-ai-assistant) 에서 제공.
-        """
-    )
-# Mount Gradio at /gradio. This adds a FastAPI lifespan hook that runs
-# blocks startup events (which activates ZeroGPU detection).
-# ssr_mode=False — explicit param overrides env var; prevents start_node_process
-# from being called and binding port 7861.
-final_app = gr.mount_gradio_app(
-    fastapi_app,
-    demo,
-    path="/gradio",
-    ssr_mode=False,
-)
-def main() -> None:
-    import uvicorn
-    port = int(os.environ.get("PORT", "7860"))
-    uvicorn.run(final_app, host="0.0.0.0", port=port)
 if __name__ == "__main__":
-    main()

+"""KPAA Backend Space — MINIMAL TEST.
+Goal: verify HF Spaces ZeroGPU activates with the absolute simplest pattern
+(Gradio Blocks + @spaces.GPU + demo.launch). Once this works, build KPAA on top.
 """
 import os
 import sys
 from pathlib import Path
+print(f"[diag] SPACES_ZERO_GPU={os.environ.get('SPACES_ZERO_GPU')!r}", flush=True)
+print(f"[diag] SPACE_ID={os.environ.get('SPACE_ID')!r}", flush=True)
+# Make src/ importable for later (when we add KPAA back).
 sys.path.insert(0, str(Path(__file__).resolve().parent / "src"))
+import spaces
+import gradio as gr
+import torch
+print(f"[diag] cuda_available={torch.cuda.is_available()}", flush=True)
+@spaces.GPU(duration=10)
+def echo(text: str) -> str:
+    """Trivial GPU function — proves ZeroGPU is wired."""
+    import torch
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    t = torch.tensor([1.0]).to(device)
+    return f"echo on {device}: {text} (sum={t.sum().item()})"
+with gr.Blocks(title="KPAA Backend (minimal)") as demo:
+    gr.Markdown("# KPAA Backend — minimal ZeroGPU test")
+    inp = gr.Textbox(label="input", value="hello")
+    out = gr.Textbox(label="output")
+    btn = gr.Button("run on GPU")
+    btn.click(echo, inputs=inp, outputs=out)
 if __name__ == "__main__":
+    demo.queue().launch(
+        server_name="0.0.0.0",
+        server_port=int(os.environ.get("PORT", "7860")),
+        ssr_mode=False,
+        show_api=False,
+    )