File size: 5,313 Bytes
527d73d bf292d9 8280e1d bf292d9 b2c2f23 527d73d b2c2f23 bf292d9 8280e1d 514fb81 7630510 8280e1d 514fb81 8280e1d 514fb81 8280e1d 7630510 514fb81 8280e1d 527d73d 8280e1d bf292d9 8280e1d bf292d9 527d73d 8280e1d 527d73d bf292d9 8280e1d 527d73d bf292d9 527d73d bf292d9 8280e1d 527d73d 8280e1d 527d73d bf292d9 527d73d 8280e1d 527d73d 8280e1d 527d73d 8280e1d 527d73d 8280e1d 527d73d 8280e1d bf292d9 7630510 8280e1d bf292d9 8280e1d bf292d9 8280e1d bf292d9 514fb81 8280e1d 514fb81 8280e1d 7630510 bf292d9 7630510 514fb81 8280e1d bf292d9 7630510 bf292d9 7630510 527d73d 514fb81 8280e1d bf292d9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 |
# app.py
import asyncio
from contextlib import asynccontextmanager
import gradio as gr
from fastapi import FastAPI
from config import settings
from rabbit_base import RabbitBase
from listener import RabbitListenerBase
from rabbit_repo import RabbitRepo
from service import LLMService
from runners.base import ILLMRunner
# =========================
# @spaces.GPU() SECTION
# =========================
# This trivial GPU endpoint keeps ZeroGPU Spaces alive at startup.
try:
import spaces
ZERO_GPU_AVAILABLE = True
@spaces.GPU() # keep it trivial (no tensor allocations)
def gpu_ready_probe() -> str:
"""
Minimal GPU-decorated function so ZeroGPU detects a GPU entrypoint.
It's also referenced by a Gradio button and a FastAPI route below.
"""
return "gpu-probe-ok"
except Exception:
ZERO_GPU_AVAILABLE = False
# Fallback for local/CPU-only runs (same signature)
def gpu_ready_probe() -> str:
return "cpu-only"
# ---------------- Runner factory (stub) ----------------
class EchoRunner(ILLMRunner):
Type = "EchoRunner"
async def StartProcess(self, llmServiceObj: dict): # noqa: N802
pass
async def RemoveProcess(self, sessionId: str): # noqa: N802
pass
async def StopRequest(self, sessionId: str): # noqa: N802
pass
async def SendInputAndGetResponse(self, llmServiceObj: dict): # noqa: N802
pass
async def runner_factory(llmServiceObj: dict) -> ILLMRunner:
return EchoRunner()
# ---------------- Publisher and Service ----------------
publisher = RabbitRepo(external_source="https://space.external")
service = LLMService(publisher, runner_factory)
# ---------------- Handlers (.NET FuncName -> service) ----------------
async def h_start(data): await service.StartProcess(data or {})
async def h_user(data): await service.UserInput(data or {})
async def h_remove(data): await service.RemoveSession(data or {})
async def h_stop(data): await service.StopRequest(data or {})
async def h_qir(data): await service.QueryIndexResult(data or {})
async def h_getreg(_): await service.GetFunctionRegistry(False)
async def h_getreg_f(_): await service.GetFunctionRegistry(True)
handlers = {
"llmStartSession": h_start,
"llmUserInput": h_user,
"llmRemoveSession": h_remove,
"llmStopRequest": h_stop,
"queryIndexResult": h_qir,
"getFunctionRegistry": h_getreg,
"getFunctionRegistryFiltered": h_getreg_f,
}
# ---------------- Listener wiring ----------------
base = RabbitBase()
listener = RabbitListenerBase(
base,
instance_name=settings.RABBIT_INSTANCE_NAME, # queue prefix like your .NET instance
handlers=handlers,
)
# Declarations mirror your C# InitRabbitMQObjs()
DECLS = [
{"ExchangeName": f"llmStartSession{settings.SERVICE_ID}", "FuncName": "llmStartSession",
"MessageTimeout": 600_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
{"ExchangeName": f"llmUserInput{settings.SERVICE_ID}", "FuncName": "llmUserInput",
"MessageTimeout": 600_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
{"ExchangeName": f"llmRemoveSession{settings.SERVICE_ID}", "FuncName": "llmRemoveSession",
"MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
{"ExchangeName": f"llmStopRequest{settings.SERVICE_ID}", "FuncName": "llmStopRequest",
"MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
{"ExchangeName": f"queryIndexResult{settings.SERVICE_ID}", "FuncName": "queryIndexResult",
"MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
{"ExchangeName": f"getFunctionRegistry{settings.SERVICE_ID}", "FuncName": "getFunctionRegistry",
"MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
{"ExchangeName": f"getFunctionRegistryFiltered{settings.SERVICE_ID}", "FuncName": "getFunctionRegistryFiltered",
"MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
]
# ---------------- Gradio UI (smoke test + GPU probe) ----------------
async def ping():
return "ok"
with gr.Blocks() as demo:
gr.Markdown("### LLM Runner (Python) — RabbitMQ listener")
with gr.Row():
btn = gr.Button("Ping")
out = gr.Textbox(label="Ping result")
btn.click(ping, inputs=None, outputs=out)
# IMPORTANT: reference the decorated function DIRECTLY (no lambda)
if ZERO_GPU_AVAILABLE:
probe_btn = gr.Button("GPU Probe")
probe_out = gr.Textbox(label="GPU Probe Result")
probe_btn.click(gpu_ready_probe, None, probe_out)
# ---------------- FastAPI + lifespan ----------------
@asynccontextmanager
async def lifespan(_app: FastAPI):
# startup
await publisher.connect()
await service.init()
await listener.start(DECLS)
yield
# shutdown (optional)
# await publisher.close()
# await listener.stop()
app = FastAPI(lifespan=lifespan)
app = gr.mount_gradio_app(app, demo, path="/")
@app.get("/health")
async def health():
return {"status": "ok"}
# Also expose probe via HTTP (belt & braces for ZeroGPU detectors)
@app.get("/gpu-probe")
def gpu_probe_route():
return {"status": gpu_ready_probe()}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)
|