johnbridges commited on
Commit
514fb81
·
1 Parent(s): 8280e1d
Files changed (1) hide show
  1. app.py +15 -8
app.py CHANGED
@@ -13,20 +13,27 @@ from service import LLMService
13
  from runners.base import ILLMRunner
14
 
15
 
16
- # ---------------- ZeroGPU probe ----------------
17
- # Keep the Space alive on ZeroGPU until real GPU inference is added.
 
 
18
  try:
19
  import spaces
20
  ZERO_GPU_AVAILABLE = True
21
 
22
- @spaces.GPU() # trivial, no tensor allocations
23
  def gpu_ready_probe() -> str:
 
 
 
 
24
  return "gpu-probe-ok"
25
 
26
  except Exception:
27
  ZERO_GPU_AVAILABLE = False
28
 
29
- def gpu_ready_probe() -> str: # fallback for local/CPU runs
 
30
  return "cpu-only"
31
 
32
 
@@ -113,11 +120,11 @@ with gr.Blocks() as demo:
113
  out = gr.Textbox(label="Ping result")
114
  btn.click(ping, inputs=None, outputs=out)
115
 
116
- # Reference the GPU probe so ZeroGPU detection never misses it.
117
  if ZERO_GPU_AVAILABLE:
118
  probe_btn = gr.Button("GPU Probe")
119
  probe_out = gr.Textbox(label="GPU Probe Result")
120
- probe_btn.click(lambda: gpu_ready_probe(), None, probe_out)
121
 
122
 
123
  # ---------------- FastAPI + lifespan ----------------
@@ -128,7 +135,7 @@ async def lifespan(_app: FastAPI):
128
  await service.init()
129
  await listener.start(DECLS)
130
  yield
131
- # shutdown (optional: close AMQP if you implement it)
132
  # await publisher.close()
133
  # await listener.stop()
134
 
@@ -140,7 +147,7 @@ app = gr.mount_gradio_app(app, demo, path="/")
140
  async def health():
141
  return {"status": "ok"}
142
 
143
- # Extra: also expose the probe via HTTP (belt & braces for ZeroGPU)
144
  @app.get("/gpu-probe")
145
  def gpu_probe_route():
146
  return {"status": gpu_ready_probe()}
 
13
  from runners.base import ILLMRunner
14
 
15
 
16
+ # =========================
17
+ # @spaces.GPU() SECTION
18
+ # =========================
19
+ # This trivial GPU endpoint keeps ZeroGPU Spaces alive at startup.
20
  try:
21
  import spaces
22
  ZERO_GPU_AVAILABLE = True
23
 
24
+ @spaces.GPU() # keep it trivial (no tensor allocations)
25
  def gpu_ready_probe() -> str:
26
+ """
27
+ Minimal GPU-decorated function so ZeroGPU detects a GPU entrypoint.
28
+ It's also referenced by a Gradio button and a FastAPI route below.
29
+ """
30
  return "gpu-probe-ok"
31
 
32
  except Exception:
33
  ZERO_GPU_AVAILABLE = False
34
 
35
+ # Fallback for local/CPU-only runs (same signature)
36
+ def gpu_ready_probe() -> str:
37
  return "cpu-only"
38
 
39
 
 
120
  out = gr.Textbox(label="Ping result")
121
  btn.click(ping, inputs=None, outputs=out)
122
 
123
+ # IMPORTANT: reference the decorated function DIRECTLY (no lambda)
124
  if ZERO_GPU_AVAILABLE:
125
  probe_btn = gr.Button("GPU Probe")
126
  probe_out = gr.Textbox(label="GPU Probe Result")
127
+ probe_btn.click(gpu_ready_probe, None, probe_out)
128
 
129
 
130
  # ---------------- FastAPI + lifespan ----------------
 
135
  await service.init()
136
  await listener.start(DECLS)
137
  yield
138
+ # shutdown (optional)
139
  # await publisher.close()
140
  # await listener.stop()
141
 
 
147
  async def health():
148
  return {"status": "ok"}
149
 
150
+ # Also expose probe via HTTP (belt & braces for ZeroGPU detectors)
151
  @app.get("/gpu-probe")
152
  def gpu_probe_route():
153
  return {"status": gpu_ready_probe()}