# app.py import asyncio import gradio as gr from fastapi import FastAPI from contextlib import asynccontextmanager from config import settings from rabbit_base import RabbitBase from listener import RabbitListenerBase from rabbit_repo import RabbitRepo from service import LLMService from runners.base import ILLMRunner # --- Optional ZeroGPU hook --- # If your Space uses ZeroGPU hardware, this satisfies the startup check. # If you're on CPU hardware, this is harmless. try: import spaces ZERO_GPU_AVAILABLE = True except Exception: spaces = None ZERO_GPU_AVAILABLE = False # --- Runner factory (stub) --- class EchoRunner(ILLMRunner): Type = "EchoRunner" async def StartProcess(self, llmServiceObj: dict): pass async def RemoveProcess(self, sessionId: str): pass async def StopRequest(self, sessionId: str): pass async def SendInputAndGetResponse(self, llmServiceObj: dict): pass async def runner_factory(llmServiceObj: dict) -> ILLMRunner: return EchoRunner() # --- Publisher and Service --- publisher = RabbitRepo(external_source="https://space.external") service = LLMService(publisher, runner_factory) # --- Handlers mapping .NET FuncName -> service method --- async def h_start(data): await service.StartProcess(data or {}) async def h_user(data): await service.UserInput(data or {}) async def h_remove(data): await service.RemoveSession(data or {}) async def h_stop(data): await service.StopRequest(data or {}) async def h_qir(data): await service.QueryIndexResult(data or {}) async def h_getreg(_): await service.GetFunctionRegistry(False) async def h_getreg_f(_): await service.GetFunctionRegistry(True) handlers = { "llmStartSession": h_start, "llmUserInput": h_user, "llmRemoveSession": h_remove, "llmStopRequest": h_stop, "queryIndexResult": h_qir, "getFunctionRegistry": h_getreg, "getFunctionRegistryFiltered": h_getreg_f, } # --- Listener wiring (needs base + instance_name) --- base = RabbitBase() listener = RabbitListenerBase( base, instance_name=settings.RABBIT_INSTANCE_NAME, # queue prefix like your .NET instance handlers=handlers, ) # Declarations mirror your C# InitRabbitMQObjs() DECLS = [ {"ExchangeName": f"llmStartSession{settings.SERVICE_ID}", "FuncName": "llmStartSession", "MessageTimeout": 600_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]}, {"ExchangeName": f"llmUserInput{settings.SERVICE_ID}", "FuncName": "llmUserInput", "MessageTimeout": 600_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]}, {"ExchangeName": f"llmRemoveSession{settings.SERVICE_ID}", "FuncName": "llmRemoveSession", "MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]}, {"ExchangeName": f"llmStopRequest{settings.SERVICE_ID}", "FuncName": "llmStopRequest", "MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]}, {"ExchangeName": f"queryIndexResult{settings.SERVICE_ID}", "FuncName": "queryIndexResult", "MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]}, {"ExchangeName": f"getFunctionRegistry{settings.SERVICE_ID}", "FuncName": "getFunctionRegistry", "MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]}, {"ExchangeName": f"getFunctionRegistryFiltered{settings.SERVICE_ID}", "FuncName": "getFunctionRegistryFiltered", "MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]}, ] # --- ZeroGPU detection function (no-op) --- # This only exists so HF Spaces sees that you "have" a GPU entrypoint on ZeroGPU. if ZERO_GPU_AVAILABLE: @spaces.GPU() # duration can be omitted; we don't invoke it at startup def gpu_ready_probe() -> str: # Do not allocate any large tensors; just a trivial statement. # Presence of this function is enough for the ZeroGPU startup check. return "gpu-probe-ok" # --- Gradio UI (for smoke test) --- async def ping(): return "ok" with gr.Blocks() as demo: gr.Markdown("### LLM Runner (Python) listening on RabbitMQ") btn = gr.Button("Ping") out = gr.Textbox() btn.click(ping, inputs=None, outputs=out) # --- FastAPI app with lifespan (replaces deprecated @on_event) --- @asynccontextmanager async def lifespan(_app: FastAPI): # startup await publisher.connect() await service.init() await listener.start(DECLS) yield # shutdown (optional cleanup) # await publisher.close() # if your RabbitRepo exposes this # await listener.stop() # if you implement stop() app = FastAPI(lifespan=lifespan) app = gr.mount_gradio_app(app, demo, path="/") @app.get("/health") async def health(): return {"status": "ok"} if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=7860)