# app.py import asyncio from contextlib import asynccontextmanager import gradio as gr from fastapi import FastAPI import spaces from config import settings from rabbit_base import RabbitBase from listener import RabbitListenerBase from rabbit_repo import RabbitRepo from service import LLMService from runners.base import ILLMRunner @spaces.GPU # NOTE: no parentheses per HF docs; simplest reliable form def gpu_ready_probe() -> str: # trivial, no tensor allocations return "gpu-probe-ok" # ---------------- Runner factory (stub) ---------------- class EchoRunner(ILLMRunner): Type = "EchoRunner" async def StartProcess(self, llmServiceObj: dict): pass async def RemoveProcess(self, sessionId: str): pass async def StopRequest(self, sessionId: str): pass async def SendInputAndGetResponse(self, llmServiceObj: dict): pass async def runner_factory(llmServiceObj: dict) -> ILLMRunner: return EchoRunner() # ---------------- Publisher and Service ---------------- publisher = RabbitRepo(external_source="https://space.external") service = LLMService(publisher, runner_factory) # ---------------- Handlers (.NET FuncName -> service) ---------------- async def h_start(data): await service.StartProcess(data or {}) async def h_user(data): await service.UserInput(data or {}) async def h_remove(data): await service.RemoveSession(data or {}) async def h_stop(data): await service.StopRequest(data or {}) async def h_qir(data): await service.QueryIndexResult(data or {}) async def h_getreg(_): await service.GetFunctionRegistry(False) async def h_getreg_f(_): await service.GetFunctionRegistry(True) handlers = { "llmStartSession": h_start, "llmUserInput": h_user, "llmRemoveSession": h_remove, "llmStopRequest": h_stop, "queryIndexResult": h_qir, "getFunctionRegistry": h_getreg, "getFunctionRegistryFiltered": h_getreg_f, } # ---------------- Listener wiring ---------------- base = RabbitBase() listener = RabbitListenerBase( base, instance_name=settings.RABBIT_INSTANCE_NAME, handlers=handlers, ) DECLS = [ {"ExchangeName": f"llmStartSession{settings.SERVICE_ID}", "FuncName": "llmStartSession", "MessageTimeout": 600_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]}, {"ExchangeName": f"llmUserInput{settings.SERVICE_ID}", "FuncName": "llmUserInput", "MessageTimeout": 600_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]}, {"ExchangeName": f"llmRemoveSession{settings.SERVICE_ID}", "FuncName": "llmRemoveSession", "MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]}, {"ExchangeName": f"llmStopRequest{settings.SERVICE_ID}", "FuncName": "llmStopRequest", "MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]}, {"ExchangeName": f"queryIndexResult{settings.SERVICE_ID}", "FuncName": "queryIndexResult", "MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]}, {"ExchangeName": f"getFunctionRegistry{settings.SERVICE_ID}", "FuncName": "getFunctionRegistry", "MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]}, {"ExchangeName": f"getFunctionRegistryFiltered{settings.SERVICE_ID}", "FuncName": "getFunctionRegistryFiltered", "MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]}, ] # ---------------- Gradio UI (smoke + GPU probe) ---------------- async def ping(): return "ok" with gr.Blocks() as demo: gr.Markdown("### LLM Runner (Python) — RabbitMQ listener") with gr.Row(): btn = gr.Button("Ping") out = gr.Textbox(label="Ping result") btn.click(ping, inputs=None, outputs=out) # IMPORTANT: reference the decorated function DIRECTLY (no lambda) if ZERO_GPU_AVAILABLE: probe_btn = gr.Button("GPU Probe") probe_out = gr.Textbox(label="GPU Probe Result") probe_btn.click(gpu_ready_probe, None, probe_out) # ---------------- FastAPI + lifespan ---------------- @asynccontextmanager async def lifespan(_app: FastAPI): await publisher.connect() await service.init() await listener.start(DECLS) yield # optional: await publisher.close() app = FastAPI(lifespan=lifespan) app = gr.mount_gradio_app(app, demo, path="/") @app.get("/health") async def health(): return {"status": "ok"} # Extra belt & braces: expose the probe via HTTP as well @app.get("/gpu-probe") def gpu_probe_route(): return {"status": gpu_ready_probe()} if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=7860)