Spaces:

Mungert
/

GradLLM

Sleeping

App Files Files Community

johnbridges commited on Aug 13, 2025

Commit

1dc5096

1 Parent(s): 514fb81

.

Browse files

Files changed (1) hide show

app.py +11 -39

app.py CHANGED Viewed

@@ -12,52 +12,33 @@ from rabbit_repo import RabbitRepo
 from service import LLMService
 from runners.base import ILLMRunner
-# =========================
-# @spaces.GPU() SECTION
-# =========================
-# This trivial GPU endpoint keeps ZeroGPU Spaces alive at startup.
 try:
     import spaces
     ZERO_GPU_AVAILABLE = True
-    @spaces.GPU()  # keep it trivial (no tensor allocations)
     def gpu_ready_probe() -> str:
-        """
-        Minimal GPU-decorated function so ZeroGPU detects a GPU entrypoint.
-        It's also referenced by a Gradio button and a FastAPI route below.
-        """
         return "gpu-probe-ok"
 except Exception:
     ZERO_GPU_AVAILABLE = False
-    # Fallback for local/CPU-only runs (same signature)
     def gpu_ready_probe() -> str:
         return "cpu-only"
 # ---------------- Runner factory (stub) ----------------
 class EchoRunner(ILLMRunner):
     Type = "EchoRunner"
-    async def StartProcess(self, llmServiceObj: dict):  # noqa: N802
-        pass
-    async def RemoveProcess(self, sessionId: str):  # noqa: N802
-        pass
-    async def StopRequest(self, sessionId: str):  # noqa: N802
-        pass
-    async def SendInputAndGetResponse(self, llmServiceObj: dict):  # noqa: N802
-        pass
 async def runner_factory(llmServiceObj: dict) -> ILLMRunner:
     return EchoRunner()
 # ---------------- Publisher and Service ----------------
 publisher = RabbitRepo(external_source="https://space.external")
 service = LLMService(publisher, runner_factory)
@@ -85,11 +66,10 @@ handlers = {
 base = RabbitBase()
 listener = RabbitListenerBase(
     base,
-    instance_name=settings.RABBIT_INSTANCE_NAME,  # queue prefix like your .NET instance
     handlers=handlers,
 )
-# Declarations mirror your C# InitRabbitMQObjs()
 DECLS = [
     {"ExchangeName": f"llmStartSession{settings.SERVICE_ID}", "FuncName": "llmStartSession",
      "MessageTimeout": 600_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
@@ -107,12 +87,10 @@ DECLS = [
      "MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
 ]
-# ---------------- Gradio UI (smoke test + GPU probe) ----------------
 async def ping():
     return "ok"
 with gr.Blocks() as demo:
     gr.Markdown("### LLM Runner (Python) — RabbitMQ listener")
     with gr.Row():
@@ -126,19 +104,14 @@ with gr.Blocks() as demo:
         probe_out = gr.Textbox(label="GPU Probe Result")
         probe_btn.click(gpu_ready_probe, None, probe_out)
 # ---------------- FastAPI + lifespan ----------------
 @asynccontextmanager
 async def lifespan(_app: FastAPI):
-    # startup
     await publisher.connect()
     await service.init()
     await listener.start(DECLS)
     yield
-    # shutdown (optional)
-    # await publisher.close()
-    # await listener.stop()
 app = FastAPI(lifespan=lifespan)
 app = gr.mount_gradio_app(app, demo, path="/")
@@ -147,12 +120,11 @@ app = gr.mount_gradio_app(app, demo, path="/")
 async def health():
     return {"status": "ok"}
-# Also expose probe via HTTP (belt & braces for ZeroGPU detectors)
 @app.get("/gpu-probe")
 def gpu_probe_route():
     return {"status": gpu_ready_probe()}
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=7860)

 from service import LLMService
 from runners.base import ILLMRunner
+# ---------------- @spaces.GPU section (ZeroGPU needs this) ----------------
 try:
     import spaces
     ZERO_GPU_AVAILABLE = True
+    @spaces.GPU   # NOTE: no parentheses per HF docs; simplest reliable form
     def gpu_ready_probe() -> str:
+        # trivial, no tensor allocations
         return "gpu-probe-ok"
 except Exception:
     ZERO_GPU_AVAILABLE = False
     def gpu_ready_probe() -> str:
         return "cpu-only"
 # ---------------- Runner factory (stub) ----------------
 class EchoRunner(ILLMRunner):
     Type = "EchoRunner"
+    async def StartProcess(self, llmServiceObj: dict): pass
+    async def RemoveProcess(self, sessionId: str): pass
+    async def StopRequest(self, sessionId: str): pass
+    async def SendInputAndGetResponse(self, llmServiceObj: dict): pass
 async def runner_factory(llmServiceObj: dict) -> ILLMRunner:
     return EchoRunner()
 # ---------------- Publisher and Service ----------------
 publisher = RabbitRepo(external_source="https://space.external")
 service = LLMService(publisher, runner_factory)
 base = RabbitBase()
 listener = RabbitListenerBase(
     base,
+    instance_name=settings.RABBIT_INSTANCE_NAME,
     handlers=handlers,
 )
 DECLS = [
     {"ExchangeName": f"llmStartSession{settings.SERVICE_ID}", "FuncName": "llmStartSession",
      "MessageTimeout": 600_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
      "MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
 ]
+# ---------------- Gradio UI (smoke + GPU probe) ----------------
 async def ping():
     return "ok"
 with gr.Blocks() as demo:
     gr.Markdown("### LLM Runner (Python) — RabbitMQ listener")
     with gr.Row():
         probe_out = gr.Textbox(label="GPU Probe Result")
         probe_btn.click(gpu_ready_probe, None, probe_out)
 # ---------------- FastAPI + lifespan ----------------
 @asynccontextmanager
 async def lifespan(_app: FastAPI):
     await publisher.connect()
     await service.init()
     await listener.start(DECLS)
     yield
+    # optional: await publisher.close()
 app = FastAPI(lifespan=lifespan)
 app = gr.mount_gradio_app(app, demo, path="/")
 async def health():
     return {"status": "ok"}
+# Extra belt & braces: expose the probe via HTTP as well
 @app.get("/gpu-probe")
 def gpu_probe_route():
     return {"status": gpu_ready_probe()}
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=7860)