scvcoder commited on
Commit
617c061
·
verified ·
1 Parent(s): 1a36a19

Hybrid: demo.launch() (ZeroGPU works) + post-launch attach KPAA /v1 routes to demo.app

Browse files
Files changed (1) hide show
  1. app.py +104 -18
app.py CHANGED
@@ -1,46 +1,132 @@
1
- """KPAA Backend Space — MINIMAL TEST.
2
 
3
- Goal: verify HF Spaces ZeroGPU activates with the absolute simplest pattern
4
- (Gradio Blocks + @spaces.GPU + demo.launch). Once this works, build KPAA on top.
 
 
 
 
 
 
 
 
 
5
  """
6
  import os
7
  import sys
 
8
  from pathlib import Path
9
 
10
- print(f"[diag] SPACES_ZERO_GPU={os.environ.get('SPACES_ZERO_GPU')!r}", flush=True)
11
- print(f"[diag] SPACE_ID={os.environ.get('SPACE_ID')!r}", flush=True)
12
 
13
- # Make src/ importable for later (when we add KPAA back).
14
  sys.path.insert(0, str(Path(__file__).resolve().parent / "src"))
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  import spaces
17
  import gradio as gr
18
- import torch
19
-
20
- print(f"[diag] cuda_available={torch.cuda.is_available()}", flush=True)
21
 
22
 
 
 
 
 
23
  @spaces.GPU(duration=10)
24
  def echo(text: str) -> str:
25
- """Trivial GPU function — proves ZeroGPU is wired."""
26
  import torch
27
  device = "cuda" if torch.cuda.is_available() else "cpu"
28
- t = torch.tensor([1.0]).to(device)
29
- return f"echo on {device}: {text} (sum={t.sum().item()})"
 
 
 
 
 
30
 
 
31
 
32
- with gr.Blocks(title="KPAA Backend (minimal)") as demo:
33
- gr.Markdown("# KPAA Backend — minimal ZeroGPU test")
34
- inp = gr.Textbox(label="input", value="hello")
35
- out = gr.Textbox(label="output")
36
- btn = gr.Button("run on GPU")
 
 
 
 
 
 
 
 
 
 
37
  btn.click(echo, inputs=inp, outputs=out)
38
 
39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  if __name__ == "__main__":
41
- demo.queue().launch(
 
 
42
  server_name="0.0.0.0",
43
  server_port=int(os.environ.get("PORT", "7860")),
44
  ssr_mode=False,
45
  show_api=False,
 
46
  )
 
 
 
 
 
 
 
 
 
1
+ """KPAA Backend Space — Gradio + ZeroGPU + KPAA OpenAI-compatible API.
2
 
3
+ Strategy validated via minimal test:
4
+ - demo.launch() (Gradio's own uvicorn) is the path that activates ZeroGPU.
5
+ - mount_gradio_app + manual uvicorn does NOT activate ZeroGPU.
6
+
7
+ So we use demo.launch(), and AFTER launch we attach KPAA's /v1 routes to
8
+ the underlying FastAPI (demo.app) via app.include_router. Routes added at
9
+ runtime are picked up because Starlette dispatches by traversing app.routes
10
+ on each request.
11
+
12
+ Hardware: ZeroGPU (zero-a10g).
13
+ Required secret: LAW_OC.
14
  """
15
  import os
16
  import sys
17
+ import time
18
  from pathlib import Path
19
 
20
+ print(f"[kpaa-backend] SPACES_ZERO_GPU={os.environ.get('SPACES_ZERO_GPU')!r}", flush=True)
21
+ print(f"[kpaa-backend] SPACE_ID={os.environ.get('SPACE_ID')!r}", flush=True)
22
 
23
+ # HF Spaces: src/ on sys.path
24
  sys.path.insert(0, str(Path(__file__).resolve().parent / "src"))
25
 
26
+
27
+ # ─── monkey-patch: gradio_client `/api_info` schema bug ────────────────────
28
+ import gradio_client.utils as _gc_utils
29
+
30
+ _orig_get_type = _gc_utils.get_type
31
+ _orig_jstpt = _gc_utils._json_schema_to_python_type
32
+
33
+
34
+ def _safe_get_type(schema):
35
+ if not isinstance(schema, dict):
36
+ return ""
37
+ return _orig_get_type(schema)
38
+
39
+
40
+ def _safe_jstpt(schema, defs):
41
+ if not isinstance(schema, dict):
42
+ return "Any"
43
+ return _orig_jstpt(schema, defs)
44
+
45
+
46
+ _gc_utils.get_type = _safe_get_type
47
+ _gc_utils._json_schema_to_python_type = _safe_jstpt
48
+ # ──────────────────────────────────────────────────────────────────────────
49
+
50
+
51
  import spaces
52
  import gradio as gr
 
 
 
53
 
54
 
55
+ # ─── ZeroGPU canary wired to a Gradio event ───────────────────────────────
56
+ # Critical insight: HF detector requires @spaces.GPU functions to be wired
57
+ # to Gradio components, not standalone. So we keep `echo` as a real button
58
+ # handler in the status UI.
59
  @spaces.GPU(duration=10)
60
  def echo(text: str) -> str:
 
61
  import torch
62
  device = "cuda" if torch.cuda.is_available() else "cpu"
63
+ return f"GPU echo ({device}): {text}"
64
+
65
+
66
+ with gr.Blocks(title="KPAA Backend") as demo:
67
+ gr.Markdown(
68
+ """
69
+ # 🧠 KPAA Backend
70
 
71
+ 한국 개인정보보호법 RAG 추론 백엔드.
72
 
73
+ ## API
74
+ - `POST /v1/chat/completions`
75
+ - `GET /v1/models`
76
+ - `GET /healthz`
77
+
78
+ UI 는 [`scvcoder/korean-privacy-ai-assistant`](https://huggingface.co/spaces/scvcoder/korean-privacy-ai-assistant) 에서 제공.
79
+
80
+ ---
81
+ ### GPU 진단
82
+ """
83
+ )
84
+ with gr.Row():
85
+ inp = gr.Textbox(label="입력", value="hello", scale=3)
86
+ out = gr.Textbox(label="출력 (GPU 검증)", scale=3)
87
+ btn = gr.Button("GPU echo 테스트")
88
  btn.click(echo, inputs=inp, outputs=out)
89
 
90
 
91
+ def _attach_kpaa_routes() -> None:
92
+ """Mount KPAA OpenAI-compatible /v1 routes onto demo's FastAPI.
93
+
94
+ Called AFTER demo.launch() — demo.app is the live Gradio FastAPI by then.
95
+ We use include_router for clean route attachment.
96
+ """
97
+ from kpaa.server import create_app
98
+ kpaa_app = create_app()
99
+
100
+ # FastAPI's `include_router` re-registers each route on the parent. Simpler:
101
+ # iterate kpaa_app.routes and append to demo.app.routes.
102
+ n_added = 0
103
+ skipped = 0
104
+ for route in kpaa_app.routes:
105
+ # Skip routes that would conflict with Gradio (e.g., '/').
106
+ path = getattr(route, "path", None)
107
+ if path in ("/", None):
108
+ skipped += 1
109
+ continue
110
+ demo.app.routes.append(route)
111
+ n_added += 1
112
+ print(f"[kpaa-backend] attached {n_added} KPAA routes (skipped {skipped})", flush=True)
113
+
114
+
115
  if __name__ == "__main__":
116
+ # Launch Gradio in a non-blocking way so we can patch demo.app afterwards.
117
+ demo.queue()
118
+ demo.launch(
119
  server_name="0.0.0.0",
120
  server_port=int(os.environ.get("PORT", "7860")),
121
  ssr_mode=False,
122
  show_api=False,
123
+ prevent_thread_lock=True,
124
  )
125
+
126
+ # demo.app is now a live Starlette/FastAPI app — attach KPAA routes.
127
+ _attach_kpaa_routes()
128
+ print("[kpaa-backend] ready: Gradio at /, KPAA OpenAI routes at /v1/...", flush=True)
129
+
130
+ # Block forever (Gradio runs on background thread).
131
+ while True:
132
+ time.sleep(60)