scvcoder commited on
Commit
1a36a19
·
verified ·
1 Parent(s): f22d865

MINIMAL TEST: Gradio + @spaces.GPU + demo.launch (HF ZeroGPU canonical pattern)

Browse files
Files changed (1) hide show
  1. app.py +29 -112
app.py CHANGED
@@ -1,129 +1,46 @@
1
- """KPAA Backend Space — OpenAI-compatible API on ZeroGPU.
2
 
3
- Key insight from Gradio 5.20 source (routes.py:1726-1750, node_server.py:74-95):
4
- - mount_gradio_app(..., ssr_mode=...) parameter takes precedence over GRADIO_SSR_MODE env var
5
- - HF Spaces seems to default GRADIO_SSR_MODE=True (we saw "with SSR ⚡" in earlier logs)
6
- - When ssr_mode=True, start_node_process() does s.bind((server_name, port)) which is the
7
- source of "address already in use" on 7861
8
-
9
- Fix: pass explicit ssr_mode=False to mount_gradio_app — disables Node SSR subprocess
10
- without relying on env var override (which HF might pre-set).
11
-
12
- The lifespan hook from mount_gradio_app (routes.py:1758-1770) is what activates
13
- ZeroGPU detection. Manual mount without that hook → "No @spaces.GPU detected".
14
- We need both: hook present AND SSR disabled.
15
  """
16
- from __future__ import annotations
17
-
18
  import os
19
  import sys
20
  from pathlib import Path
21
 
22
- # Force-set (not setdefault) so we override any HF Spaces pre-set value.
23
- os.environ["GRADIO_SSR_MODE"] = "False"
24
- os.environ["GRADIO_SHARE"] = "False"
25
- os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
26
 
27
- # HF Spaces: src/ sys.path prepend
28
  sys.path.insert(0, str(Path(__file__).resolve().parent / "src"))
29
 
 
 
 
30
 
31
- # ─── monkey-patch: gradio_client `/api_info` schema bug ────────────────────
32
- import gradio_client.utils as _gc_utils # noqa: E402
33
-
34
- _orig_get_type = _gc_utils.get_type
35
- _orig_jstpt = _gc_utils._json_schema_to_python_type
36
-
37
-
38
- def _safe_get_type(schema):
39
- if not isinstance(schema, dict):
40
- return ""
41
- return _orig_get_type(schema)
42
-
43
-
44
- def _safe_jstpt(schema, defs):
45
- if not isinstance(schema, dict):
46
- return "Any"
47
- return _orig_jstpt(schema, defs)
48
-
49
-
50
- _gc_utils.get_type = _safe_get_type
51
- _gc_utils._json_schema_to_python_type = _safe_jstpt
52
- # ──────────────────────────────────────────────────────────────────────────
53
-
54
-
55
- # ─── ZeroGPU startup canary + diagnostics ─────────────────────────────────
56
- print(f"[kpaa-backend] SPACES_ZERO_GPU={os.environ.get('SPACES_ZERO_GPU')!r}", flush=True)
57
- print(f"[kpaa-backend] SPACE_ID={os.environ.get('SPACE_ID')!r}", flush=True)
58
-
59
- import spaces # noqa: E402
60
-
61
- # Correct import path: Config lives at spaces.config.Config (top-level config module),
62
- # not spaces.zero.config. Wrapped in try so a future API change doesn't crash startup.
63
- try:
64
- from spaces.config import Config as _spcfg
65
- print(f"[kpaa-backend] spaces.Config.zero_gpu={_spcfg.zero_gpu}", flush=True)
66
- except Exception as e:
67
- print(f"[kpaa-backend] could not read spaces.Config: {e!r}", flush=True)
68
-
69
-
70
- @spaces.GPU(duration=1)
71
- def _zerogpu_startup_canary() -> None:
72
- return None
73
 
74
 
75
- print(
76
- f"[kpaa-backend] canary 'zerogpu' attr: "
77
- f"{getattr(_zerogpu_startup_canary, 'zerogpu', '<not set decorator was no-op>')}",
78
- flush=True,
79
- )
80
- # ──────────────────────────────────────────────────────────────────────────
 
81
 
82
 
83
- import gradio as gr # noqa: E402
84
-
85
- from kpaa.server import create_app # noqa: E402
86
-
87
-
88
- # Build FastAPI from KPAA's existing OpenAI-compatible server.
89
- fastapi_app = create_app()
90
-
91
-
92
- # Minimal Gradio Blocks for ZeroGPU detection + status page.
93
- with gr.Blocks(title="KPAA Backend") as demo:
94
- gr.Markdown(
95
- """
96
- # 🧠 KPAA Backend
97
-
98
- 한국 개인정보보호법 RAG 추론 서버 (OpenAI 호환 API).
99
-
100
- - `POST /v1/chat/completions`
101
- - `GET /v1/models`
102
- - `GET /healthz`, `/info`
103
-
104
- UI 는 [`scvcoder/korean-privacy-ai-assistant`](https://huggingface.co/spaces/scvcoder/korean-privacy-ai-assistant) 에서 제공.
105
- """
106
- )
107
-
108
-
109
- # Mount Gradio at /gradio. This adds a FastAPI lifespan hook that runs
110
- # blocks startup events (which activates ZeroGPU detection).
111
- # ssr_mode=False — explicit param overrides env var; prevents start_node_process
112
- # from being called and binding port 7861.
113
- final_app = gr.mount_gradio_app(
114
- fastapi_app,
115
- demo,
116
- path="/gradio",
117
- ssr_mode=False,
118
- )
119
-
120
-
121
- def main() -> None:
122
- import uvicorn
123
-
124
- port = int(os.environ.get("PORT", "7860"))
125
- uvicorn.run(final_app, host="0.0.0.0", port=port)
126
 
127
 
128
  if __name__ == "__main__":
129
- main()
 
 
 
 
 
 
1
+ """KPAA Backend Space — MINIMAL TEST.
2
 
3
+ Goal: verify HF Spaces ZeroGPU activates with the absolute simplest pattern
4
+ (Gradio Blocks + @spaces.GPU + demo.launch). Once this works, build KPAA on top.
 
 
 
 
 
 
 
 
 
 
5
  """
 
 
6
  import os
7
  import sys
8
  from pathlib import Path
9
 
10
+ print(f"[diag] SPACES_ZERO_GPU={os.environ.get('SPACES_ZERO_GPU')!r}", flush=True)
11
+ print(f"[diag] SPACE_ID={os.environ.get('SPACE_ID')!r}", flush=True)
 
 
12
 
13
+ # Make src/ importable for later (when we add KPAA back).
14
  sys.path.insert(0, str(Path(__file__).resolve().parent / "src"))
15
 
16
+ import spaces
17
+ import gradio as gr
18
+ import torch
19
 
20
+ print(f"[diag] cuda_available={torch.cuda.is_available()}", flush=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
 
23
+ @spaces.GPU(duration=10)
24
+ def echo(text: str) -> str:
25
+ """Trivial GPU functionproves ZeroGPU is wired."""
26
+ import torch
27
+ device = "cuda" if torch.cuda.is_available() else "cpu"
28
+ t = torch.tensor([1.0]).to(device)
29
+ return f"echo on {device}: {text} (sum={t.sum().item()})"
30
 
31
 
32
+ with gr.Blocks(title="KPAA Backend (minimal)") as demo:
33
+ gr.Markdown("# KPAA Backend — minimal ZeroGPU test")
34
+ inp = gr.Textbox(label="input", value="hello")
35
+ out = gr.Textbox(label="output")
36
+ btn = gr.Button("run on GPU")
37
+ btn.click(echo, inputs=inp, outputs=out)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
 
40
  if __name__ == "__main__":
41
+ demo.queue().launch(
42
+ server_name="0.0.0.0",
43
+ server_port=int(os.environ.get("PORT", "7860")),
44
+ ssr_mode=False,
45
+ show_api=False,
46
+ )