Spaces:

Mungert
/

GradLLM

Running

App Files Files Community

johnbridges commited on Aug 13, 2025

Commit

514fb81

1 Parent(s): 8280e1d

.

Browse files

Files changed (1) hide show

app.py +15 -8

app.py CHANGED Viewed

@@ -13,20 +13,27 @@ from service import LLMService
 from runners.base import ILLMRunner
-# ---------------- ZeroGPU probe ----------------
-# Keep the Space alive on ZeroGPU until real GPU inference is added.
 try:
     import spaces
     ZERO_GPU_AVAILABLE = True
-    @spaces.GPU()  # trivial, no tensor allocations
     def gpu_ready_probe() -> str:
         return "gpu-probe-ok"
 except Exception:
     ZERO_GPU_AVAILABLE = False
-    def gpu_ready_probe() -> str:  # fallback for local/CPU runs
         return "cpu-only"
@@ -113,11 +120,11 @@ with gr.Blocks() as demo:
         out = gr.Textbox(label="Ping result")
     btn.click(ping, inputs=None, outputs=out)
-    # Reference the GPU probe so ZeroGPU detection never misses it.
     if ZERO_GPU_AVAILABLE:
         probe_btn = gr.Button("GPU Probe")
         probe_out = gr.Textbox(label="GPU Probe Result")
-        probe_btn.click(lambda: gpu_ready_probe(), None, probe_out)
 # ---------------- FastAPI + lifespan ----------------
@@ -128,7 +135,7 @@ async def lifespan(_app: FastAPI):
     await service.init()
     await listener.start(DECLS)
     yield
-    # shutdown (optional: close AMQP if you implement it)
     # await publisher.close()
     # await listener.stop()
@@ -140,7 +147,7 @@ app = gr.mount_gradio_app(app, demo, path="/")
 async def health():
     return {"status": "ok"}
-# Extra: also expose the probe via HTTP (belt & braces for ZeroGPU)
 @app.get("/gpu-probe")
 def gpu_probe_route():
     return {"status": gpu_ready_probe()}

 from runners.base import ILLMRunner
+# =========================
+# @spaces.GPU() SECTION
+# =========================
+# This trivial GPU endpoint keeps ZeroGPU Spaces alive at startup.
 try:
     import spaces
     ZERO_GPU_AVAILABLE = True
+    @spaces.GPU()  # keep it trivial (no tensor allocations)
     def gpu_ready_probe() -> str:
+        """
+        Minimal GPU-decorated function so ZeroGPU detects a GPU entrypoint.
+        It's also referenced by a Gradio button and a FastAPI route below.
+        """
         return "gpu-probe-ok"
 except Exception:
     ZERO_GPU_AVAILABLE = False
+    # Fallback for local/CPU-only runs (same signature)
+    def gpu_ready_probe() -> str:
         return "cpu-only"
         out = gr.Textbox(label="Ping result")
     btn.click(ping, inputs=None, outputs=out)
+    # IMPORTANT: reference the decorated function DIRECTLY (no lambda)
     if ZERO_GPU_AVAILABLE:
         probe_btn = gr.Button("GPU Probe")
         probe_out = gr.Textbox(label="GPU Probe Result")
+        probe_btn.click(gpu_ready_probe, None, probe_out)
 # ---------------- FastAPI + lifespan ----------------
     await service.init()
     await listener.start(DECLS)
     yield
+    # shutdown (optional)
     # await publisher.close()
     # await listener.stop()
 async def health():
     return {"status": "ok"}
+# Also expose probe via HTTP (belt & braces for ZeroGPU detectors)
 @app.get("/gpu-probe")
 def gpu_probe_route():
     return {"status": gpu_ready_probe()}