Spaces:

Mungert
/

GradLLM

Sleeping

App Files Files Community

johnbridges commited on Aug 13, 2025

Commit

9329d65

1 Parent(s): 1dc5096

.

Browse files

Files changed (1) hide show

app.py +35 -18

app.py CHANGED Viewed

@@ -12,21 +12,28 @@ from rabbit_repo import RabbitRepo
 from service import LLMService
 from runners.base import ILLMRunner
-# ---------------- @spaces.GPU section (ZeroGPU needs this) ----------------
 try:
     import spaces
     ZERO_GPU_AVAILABLE = True
-    @spaces.GPU   # NOTE: no parentheses per HF docs; simplest reliable form
-    def gpu_ready_probe() -> str:
-        # trivial, no tensor allocations
-        return "gpu-probe-ok"
 except Exception:
     ZERO_GPU_AVAILABLE = False
-    def gpu_ready_probe() -> str:
-        return "cpu-only"
 # ---------------- Runner factory (stub) ----------------
 class EchoRunner(ILLMRunner):
@@ -39,6 +46,7 @@ class EchoRunner(ILLMRunner):
 async def runner_factory(llmServiceObj: dict) -> ILLMRunner:
     return EchoRunner()
 # ---------------- Publisher and Service ----------------
 publisher = RabbitRepo(external_source="https://space.external")
 service = LLMService(publisher, runner_factory)
@@ -66,10 +74,11 @@ handlers = {
 base = RabbitBase()
 listener = RabbitListenerBase(
     base,
-    instance_name=settings.RABBIT_INSTANCE_NAME,
     handlers=handlers,
 )
 DECLS = [
     {"ExchangeName": f"llmStartSession{settings.SERVICE_ID}", "FuncName": "llmStartSession",
      "MessageTimeout": 600_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
@@ -87,31 +96,37 @@ DECLS = [
      "MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
 ]
-# ---------------- Gradio UI (smoke + GPU probe) ----------------
 async def ping():
     return "ok"
 with gr.Blocks() as demo:
-    gr.Markdown("### LLM Runner (Python) — RabbitMQ listener")
     with gr.Row():
         btn = gr.Button("Ping")
         out = gr.Textbox(label="Ping result")
     btn.click(ping, inputs=None, outputs=out)
-    # IMPORTANT: reference the decorated function DIRECTLY (no lambda)
-    if ZERO_GPU_AVAILABLE:
-        probe_btn = gr.Button("GPU Probe")
-        probe_out = gr.Textbox(label="GPU Probe Result")
-        probe_btn.click(gpu_ready_probe, None, probe_out)
 # ---------------- FastAPI + lifespan ----------------
 @asynccontextmanager
 async def lifespan(_app: FastAPI):
     await publisher.connect()
     await service.init()
     await listener.start(DECLS)
     yield
-    # optional: await publisher.close()
 app = FastAPI(lifespan=lifespan)
 app = gr.mount_gradio_app(app, demo, path="/")
@@ -120,11 +135,13 @@ app = gr.mount_gradio_app(app, demo, path="/")
 async def health():
     return {"status": "ok"}
-# Extra belt & braces: expose the probe via HTTP as well
 @app.get("/gpu-probe")
 def gpu_probe_route():
-    return {"status": gpu_ready_probe()}
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=7860)

 from service import LLMService
 from runners.base import ILLMRunner
+# =========================
+# @spaces.GPU() SECTION
+# =========================
+# Mirrors the working Space: define a concrete GPU-decorated fn that Gradio calls.
 try:
     import spaces
     ZERO_GPU_AVAILABLE = True
+    @spaces.GPU(duration=120)  # trivial GPU entrypoint; detector-friendly
+    def gpu_entrypoint():
+        """
+        Minimal GPU function so ZeroGPU sees a GPU endpoint.
+        Replace the body later with real CUDA work as needed.
+        """
+        return "gpu: ready"
 except Exception:
     ZERO_GPU_AVAILABLE = False
+    def gpu_entrypoint():
+        return "gpu: not available (CPU only)"
 # ---------------- Runner factory (stub) ----------------
 class EchoRunner(ILLMRunner):
 async def runner_factory(llmServiceObj: dict) -> ILLMRunner:
     return EchoRunner()
 # ---------------- Publisher and Service ----------------
 publisher = RabbitRepo(external_source="https://space.external")
 service = LLMService(publisher, runner_factory)
 base = RabbitBase()
 listener = RabbitListenerBase(
     base,
+    instance_name=settings.RABBIT_INSTANCE_NAME,  # queue prefix like your .NET instance
     handlers=handlers,
 )
+# Declarations mirror your C# InitRabbitMQObjs()
 DECLS = [
     {"ExchangeName": f"llmStartSession{settings.SERVICE_ID}", "FuncName": "llmStartSession",
      "MessageTimeout": 600_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
      "MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
 ]
+# ---------------- Gradio UI (smoke test + GPU button) ----------------
 async def ping():
     return "ok"
 with gr.Blocks() as demo:
+    gr.Markdown("### LLM Runner (Python) — RabbitMQ listener (ZeroGPU-ready)")
     with gr.Row():
         btn = gr.Button("Ping")
         out = gr.Textbox(label="Ping result")
     btn.click(ping, inputs=None, outputs=out)
+    # Reference the GPU-decorated function **directly** (no lambda)
+    with gr.Row():
+        gpu_btn = gr.Button("GPU Ready Probe")
+        gpu_out = gr.Textbox(label="GPU Probe Result", interactive=False)
+    gpu_btn.click(gpu_entrypoint, inputs=None, outputs=gpu_out)
 # ---------------- FastAPI + lifespan ----------------
 @asynccontextmanager
 async def lifespan(_app: FastAPI):
+    # startup
     await publisher.connect()
     await service.init()
     await listener.start(DECLS)
     yield
+    # shutdown (optional)
+    # await publisher.close()
+    # await listener.stop()
 app = FastAPI(lifespan=lifespan)
 app = gr.mount_gradio_app(app, demo, path="/")
 async def health():
     return {"status": "ok"}
+# Also expose the probe via HTTP (extra-safe for detectors)
 @app.get("/gpu-probe")
 def gpu_probe_route():
+    return {"status": gpu_entrypoint()}
 if __name__ == "__main__":
+    # For local runs; on HF Spaces, the SDK manages the server.
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=7860)