johnbridges commited on
Commit
9329d65
·
1 Parent(s): 1dc5096
Files changed (1) hide show
  1. app.py +35 -18
app.py CHANGED
@@ -12,21 +12,28 @@ from rabbit_repo import RabbitRepo
12
  from service import LLMService
13
  from runners.base import ILLMRunner
14
 
15
- # ---------------- @spaces.GPU section (ZeroGPU needs this) ----------------
 
 
 
16
  try:
17
  import spaces
18
  ZERO_GPU_AVAILABLE = True
19
 
20
- @spaces.GPU # NOTE: no parentheses per HF docs; simplest reliable form
21
- def gpu_ready_probe() -> str:
22
- # trivial, no tensor allocations
23
- return "gpu-probe-ok"
 
 
 
24
 
25
  except Exception:
26
  ZERO_GPU_AVAILABLE = False
27
 
28
- def gpu_ready_probe() -> str:
29
- return "cpu-only"
 
30
 
31
  # ---------------- Runner factory (stub) ----------------
32
  class EchoRunner(ILLMRunner):
@@ -39,6 +46,7 @@ class EchoRunner(ILLMRunner):
39
  async def runner_factory(llmServiceObj: dict) -> ILLMRunner:
40
  return EchoRunner()
41
 
 
42
  # ---------------- Publisher and Service ----------------
43
  publisher = RabbitRepo(external_source="https://space.external")
44
  service = LLMService(publisher, runner_factory)
@@ -66,10 +74,11 @@ handlers = {
66
  base = RabbitBase()
67
  listener = RabbitListenerBase(
68
  base,
69
- instance_name=settings.RABBIT_INSTANCE_NAME,
70
  handlers=handlers,
71
  )
72
 
 
73
  DECLS = [
74
  {"ExchangeName": f"llmStartSession{settings.SERVICE_ID}", "FuncName": "llmStartSession",
75
  "MessageTimeout": 600_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
@@ -87,31 +96,37 @@ DECLS = [
87
  "MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
88
  ]
89
 
90
- # ---------------- Gradio UI (smoke + GPU probe) ----------------
 
91
  async def ping():
92
  return "ok"
93
 
94
  with gr.Blocks() as demo:
95
- gr.Markdown("### LLM Runner (Python) — RabbitMQ listener")
 
96
  with gr.Row():
97
  btn = gr.Button("Ping")
98
  out = gr.Textbox(label="Ping result")
99
  btn.click(ping, inputs=None, outputs=out)
100
 
101
- # IMPORTANT: reference the decorated function DIRECTLY (no lambda)
102
- if ZERO_GPU_AVAILABLE:
103
- probe_btn = gr.Button("GPU Probe")
104
- probe_out = gr.Textbox(label="GPU Probe Result")
105
- probe_btn.click(gpu_ready_probe, None, probe_out)
 
106
 
107
  # ---------------- FastAPI + lifespan ----------------
108
  @asynccontextmanager
109
  async def lifespan(_app: FastAPI):
 
110
  await publisher.connect()
111
  await service.init()
112
  await listener.start(DECLS)
113
  yield
114
- # optional: await publisher.close()
 
 
115
 
116
  app = FastAPI(lifespan=lifespan)
117
  app = gr.mount_gradio_app(app, demo, path="/")
@@ -120,11 +135,13 @@ app = gr.mount_gradio_app(app, demo, path="/")
120
  async def health():
121
  return {"status": "ok"}
122
 
123
- # Extra belt & braces: expose the probe via HTTP as well
124
  @app.get("/gpu-probe")
125
  def gpu_probe_route():
126
- return {"status": gpu_ready_probe()}
 
127
 
128
  if __name__ == "__main__":
 
129
  import uvicorn
130
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
12
  from service import LLMService
13
  from runners.base import ILLMRunner
14
 
15
+ # =========================
16
+ # @spaces.GPU() SECTION
17
+ # =========================
18
+ # Mirrors the working Space: define a concrete GPU-decorated fn that Gradio calls.
19
  try:
20
  import spaces
21
  ZERO_GPU_AVAILABLE = True
22
 
23
+ @spaces.GPU(duration=120) # trivial GPU entrypoint; detector-friendly
24
+ def gpu_entrypoint():
25
+ """
26
+ Minimal GPU function so ZeroGPU sees a GPU endpoint.
27
+ Replace the body later with real CUDA work as needed.
28
+ """
29
+ return "gpu: ready"
30
 
31
  except Exception:
32
  ZERO_GPU_AVAILABLE = False
33
 
34
+ def gpu_entrypoint():
35
+ return "gpu: not available (CPU only)"
36
+
37
 
38
  # ---------------- Runner factory (stub) ----------------
39
  class EchoRunner(ILLMRunner):
 
46
  async def runner_factory(llmServiceObj: dict) -> ILLMRunner:
47
  return EchoRunner()
48
 
49
+
50
  # ---------------- Publisher and Service ----------------
51
  publisher = RabbitRepo(external_source="https://space.external")
52
  service = LLMService(publisher, runner_factory)
 
74
  base = RabbitBase()
75
  listener = RabbitListenerBase(
76
  base,
77
+ instance_name=settings.RABBIT_INSTANCE_NAME, # queue prefix like your .NET instance
78
  handlers=handlers,
79
  )
80
 
81
+ # Declarations mirror your C# InitRabbitMQObjs()
82
  DECLS = [
83
  {"ExchangeName": f"llmStartSession{settings.SERVICE_ID}", "FuncName": "llmStartSession",
84
  "MessageTimeout": 600_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
 
96
  "MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
97
  ]
98
 
99
+
100
+ # ---------------- Gradio UI (smoke test + GPU button) ----------------
101
  async def ping():
102
  return "ok"
103
 
104
  with gr.Blocks() as demo:
105
+ gr.Markdown("### LLM Runner (Python) — RabbitMQ listener (ZeroGPU-ready)")
106
+
107
  with gr.Row():
108
  btn = gr.Button("Ping")
109
  out = gr.Textbox(label="Ping result")
110
  btn.click(ping, inputs=None, outputs=out)
111
 
112
+ # Reference the GPU-decorated function **directly** (no lambda)
113
+ with gr.Row():
114
+ gpu_btn = gr.Button("GPU Ready Probe")
115
+ gpu_out = gr.Textbox(label="GPU Probe Result", interactive=False)
116
+ gpu_btn.click(gpu_entrypoint, inputs=None, outputs=gpu_out)
117
+
118
 
119
  # ---------------- FastAPI + lifespan ----------------
120
  @asynccontextmanager
121
  async def lifespan(_app: FastAPI):
122
+ # startup
123
  await publisher.connect()
124
  await service.init()
125
  await listener.start(DECLS)
126
  yield
127
+ # shutdown (optional)
128
+ # await publisher.close()
129
+ # await listener.stop()
130
 
131
  app = FastAPI(lifespan=lifespan)
132
  app = gr.mount_gradio_app(app, demo, path="/")
 
135
  async def health():
136
  return {"status": "ok"}
137
 
138
+ # Also expose the probe via HTTP (extra-safe for detectors)
139
  @app.get("/gpu-probe")
140
  def gpu_probe_route():
141
+ return {"status": gpu_entrypoint()}
142
+
143
 
144
  if __name__ == "__main__":
145
+ # For local runs; on HF Spaces, the SDK manages the server.
146
  import uvicorn
147
  uvicorn.run(app, host="0.0.0.0", port=7860)