johnbridges commited on
Commit
1dc5096
·
1 Parent(s): 514fb81
Files changed (1) hide show
  1. app.py +11 -39
app.py CHANGED
@@ -12,52 +12,33 @@ from rabbit_repo import RabbitRepo
12
  from service import LLMService
13
  from runners.base import ILLMRunner
14
 
15
-
16
- # =========================
17
- # @spaces.GPU() SECTION
18
- # =========================
19
- # This trivial GPU endpoint keeps ZeroGPU Spaces alive at startup.
20
  try:
21
  import spaces
22
  ZERO_GPU_AVAILABLE = True
23
 
24
- @spaces.GPU() # keep it trivial (no tensor allocations)
25
  def gpu_ready_probe() -> str:
26
- """
27
- Minimal GPU-decorated function so ZeroGPU detects a GPU entrypoint.
28
- It's also referenced by a Gradio button and a FastAPI route below.
29
- """
30
  return "gpu-probe-ok"
31
 
32
  except Exception:
33
  ZERO_GPU_AVAILABLE = False
34
 
35
- # Fallback for local/CPU-only runs (same signature)
36
  def gpu_ready_probe() -> str:
37
  return "cpu-only"
38
 
39
-
40
  # ---------------- Runner factory (stub) ----------------
41
  class EchoRunner(ILLMRunner):
42
  Type = "EchoRunner"
43
-
44
- async def StartProcess(self, llmServiceObj: dict): # noqa: N802
45
- pass
46
-
47
- async def RemoveProcess(self, sessionId: str): # noqa: N802
48
- pass
49
-
50
- async def StopRequest(self, sessionId: str): # noqa: N802
51
- pass
52
-
53
- async def SendInputAndGetResponse(self, llmServiceObj: dict): # noqa: N802
54
- pass
55
-
56
 
57
  async def runner_factory(llmServiceObj: dict) -> ILLMRunner:
58
  return EchoRunner()
59
 
60
-
61
  # ---------------- Publisher and Service ----------------
62
  publisher = RabbitRepo(external_source="https://space.external")
63
  service = LLMService(publisher, runner_factory)
@@ -85,11 +66,10 @@ handlers = {
85
  base = RabbitBase()
86
  listener = RabbitListenerBase(
87
  base,
88
- instance_name=settings.RABBIT_INSTANCE_NAME, # queue prefix like your .NET instance
89
  handlers=handlers,
90
  )
91
 
92
- # Declarations mirror your C# InitRabbitMQObjs()
93
  DECLS = [
94
  {"ExchangeName": f"llmStartSession{settings.SERVICE_ID}", "FuncName": "llmStartSession",
95
  "MessageTimeout": 600_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
@@ -107,12 +87,10 @@ DECLS = [
107
  "MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
108
  ]
109
 
110
-
111
- # ---------------- Gradio UI (smoke test + GPU probe) ----------------
112
  async def ping():
113
  return "ok"
114
 
115
-
116
  with gr.Blocks() as demo:
117
  gr.Markdown("### LLM Runner (Python) — RabbitMQ listener")
118
  with gr.Row():
@@ -126,19 +104,14 @@ with gr.Blocks() as demo:
126
  probe_out = gr.Textbox(label="GPU Probe Result")
127
  probe_btn.click(gpu_ready_probe, None, probe_out)
128
 
129
-
130
  # ---------------- FastAPI + lifespan ----------------
131
  @asynccontextmanager
132
  async def lifespan(_app: FastAPI):
133
- # startup
134
  await publisher.connect()
135
  await service.init()
136
  await listener.start(DECLS)
137
  yield
138
- # shutdown (optional)
139
- # await publisher.close()
140
- # await listener.stop()
141
-
142
 
143
  app = FastAPI(lifespan=lifespan)
144
  app = gr.mount_gradio_app(app, demo, path="/")
@@ -147,12 +120,11 @@ app = gr.mount_gradio_app(app, demo, path="/")
147
  async def health():
148
  return {"status": "ok"}
149
 
150
- # Also expose probe via HTTP (belt & braces for ZeroGPU detectors)
151
  @app.get("/gpu-probe")
152
  def gpu_probe_route():
153
  return {"status": gpu_ready_probe()}
154
 
155
-
156
  if __name__ == "__main__":
157
  import uvicorn
158
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
12
  from service import LLMService
13
  from runners.base import ILLMRunner
14
 
15
+ # ---------------- @spaces.GPU section (ZeroGPU needs this) ----------------
 
 
 
 
16
  try:
17
  import spaces
18
  ZERO_GPU_AVAILABLE = True
19
 
20
+ @spaces.GPU # NOTE: no parentheses per HF docs; simplest reliable form
21
  def gpu_ready_probe() -> str:
22
+ # trivial, no tensor allocations
 
 
 
23
  return "gpu-probe-ok"
24
 
25
  except Exception:
26
  ZERO_GPU_AVAILABLE = False
27
 
 
28
  def gpu_ready_probe() -> str:
29
  return "cpu-only"
30
 
 
31
  # ---------------- Runner factory (stub) ----------------
32
  class EchoRunner(ILLMRunner):
33
  Type = "EchoRunner"
34
+ async def StartProcess(self, llmServiceObj: dict): pass
35
+ async def RemoveProcess(self, sessionId: str): pass
36
+ async def StopRequest(self, sessionId: str): pass
37
+ async def SendInputAndGetResponse(self, llmServiceObj: dict): pass
 
 
 
 
 
 
 
 
 
38
 
39
  async def runner_factory(llmServiceObj: dict) -> ILLMRunner:
40
  return EchoRunner()
41
 
 
42
  # ---------------- Publisher and Service ----------------
43
  publisher = RabbitRepo(external_source="https://space.external")
44
  service = LLMService(publisher, runner_factory)
 
66
  base = RabbitBase()
67
  listener = RabbitListenerBase(
68
  base,
69
+ instance_name=settings.RABBIT_INSTANCE_NAME,
70
  handlers=handlers,
71
  )
72
 
 
73
  DECLS = [
74
  {"ExchangeName": f"llmStartSession{settings.SERVICE_ID}", "FuncName": "llmStartSession",
75
  "MessageTimeout": 600_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
 
87
  "MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
88
  ]
89
 
90
+ # ---------------- Gradio UI (smoke + GPU probe) ----------------
 
91
  async def ping():
92
  return "ok"
93
 
 
94
  with gr.Blocks() as demo:
95
  gr.Markdown("### LLM Runner (Python) — RabbitMQ listener")
96
  with gr.Row():
 
104
  probe_out = gr.Textbox(label="GPU Probe Result")
105
  probe_btn.click(gpu_ready_probe, None, probe_out)
106
 
 
107
  # ---------------- FastAPI + lifespan ----------------
108
  @asynccontextmanager
109
  async def lifespan(_app: FastAPI):
 
110
  await publisher.connect()
111
  await service.init()
112
  await listener.start(DECLS)
113
  yield
114
+ # optional: await publisher.close()
 
 
 
115
 
116
  app = FastAPI(lifespan=lifespan)
117
  app = gr.mount_gradio_app(app, demo, path="/")
 
120
  async def health():
121
  return {"status": "ok"}
122
 
123
+ # Extra belt & braces: expose the probe via HTTP as well
124
  @app.get("/gpu-probe")
125
  def gpu_probe_route():
126
  return {"status": gpu_ready_probe()}
127
 
 
128
  if __name__ == "__main__":
129
  import uvicorn
130
  uvicorn.run(app, host="0.0.0.0", port=7860)