File size: 4,794 Bytes
527d73d
bf292d9
 
 
7630510
bf292d9
b2c2f23
527d73d
b2c2f23
 
 
 
bf292d9
7630510
 
 
 
 
 
 
 
 
 
527d73d
bf292d9
 
 
 
 
 
527d73d
bf292d9
 
 
 
527d73d
bf292d9
527d73d
bf292d9
 
 
527d73d
 
bf292d9
527d73d
 
 
 
bf292d9
 
 
 
 
 
 
 
 
 
 
7630510
527d73d
 
 
7630510
527d73d
 
bf292d9
 
 
527d73d
 
 
 
 
7630510
527d73d
7630510
527d73d
7630510
527d73d
7630510
527d73d
7630510
bf292d9
 
7630510
 
 
 
 
 
 
 
 
 
bf292d9
 
 
 
 
 
 
 
 
 
7630510
 
 
 
bf292d9
 
 
7630510
 
 
 
bf292d9
7630510
 
bf292d9
7630510
 
 
527d73d
bf292d9
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# app.py
import asyncio
import gradio as gr
from fastapi import FastAPI
from contextlib import asynccontextmanager

from config import settings
from rabbit_base import RabbitBase
from listener import RabbitListenerBase
from rabbit_repo import RabbitRepo
from service import LLMService
from runners.base import ILLMRunner

# --- Optional ZeroGPU hook ---
# If your Space uses ZeroGPU hardware, this satisfies the startup check.
# If you're on CPU hardware, this is harmless.
try:
    import spaces
    ZERO_GPU_AVAILABLE = True
except Exception:
    spaces = None
    ZERO_GPU_AVAILABLE = False


# --- Runner factory (stub) ---
class EchoRunner(ILLMRunner):
    Type = "EchoRunner"
    async def StartProcess(self, llmServiceObj: dict): pass
    async def RemoveProcess(self, sessionId: str): pass
    async def StopRequest(self, sessionId: str): pass
    async def SendInputAndGetResponse(self, llmServiceObj: dict): pass

async def runner_factory(llmServiceObj: dict) -> ILLMRunner:
    return EchoRunner()


# --- Publisher and Service ---
publisher = RabbitRepo(external_source="https://space.external")
service = LLMService(publisher, runner_factory)

# --- Handlers mapping .NET FuncName -> service method ---
async def h_start(data):  await service.StartProcess(data or {})
async def h_user(data):   await service.UserInput(data or {})
async def h_remove(data): await service.RemoveSession(data or {})
async def h_stop(data):   await service.StopRequest(data or {})
async def h_qir(data):    await service.QueryIndexResult(data or {})
async def h_getreg(_):    await service.GetFunctionRegistry(False)
async def h_getreg_f(_):  await service.GetFunctionRegistry(True)

handlers = {
    "llmStartSession": h_start,
    "llmUserInput": h_user,
    "llmRemoveSession": h_remove,
    "llmStopRequest": h_stop,
    "queryIndexResult": h_qir,
    "getFunctionRegistry": h_getreg,
    "getFunctionRegistryFiltered": h_getreg_f,
}

# --- Listener wiring (needs base + instance_name) ---
base = RabbitBase()
listener = RabbitListenerBase(
    base,
    instance_name=settings.RABBIT_INSTANCE_NAME,   # queue prefix like your .NET instance
    handlers=handlers,
)

# Declarations mirror your C# InitRabbitMQObjs()
DECLS = [
    {"ExchangeName": f"llmStartSession{settings.SERVICE_ID}", "FuncName": "llmStartSession",
     "MessageTimeout": 600_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
    {"ExchangeName": f"llmUserInput{settings.SERVICE_ID}", "FuncName": "llmUserInput",
     "MessageTimeout": 600_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
    {"ExchangeName": f"llmRemoveSession{settings.SERVICE_ID}", "FuncName": "llmRemoveSession",
      "MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
    {"ExchangeName": f"llmStopRequest{settings.SERVICE_ID}", "FuncName": "llmStopRequest",
      "MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
    {"ExchangeName": f"queryIndexResult{settings.SERVICE_ID}", "FuncName": "queryIndexResult",
      "MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
    {"ExchangeName": f"getFunctionRegistry{settings.SERVICE_ID}", "FuncName": "getFunctionRegistry",
      "MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
    {"ExchangeName": f"getFunctionRegistryFiltered{settings.SERVICE_ID}", "FuncName": "getFunctionRegistryFiltered",
      "MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
]

# --- ZeroGPU detection function (no-op) ---
# This only exists so HF Spaces sees that you "have" a GPU entrypoint on ZeroGPU.
if ZERO_GPU_AVAILABLE:
    @spaces.GPU()  # duration can be omitted; we don't invoke it at startup
    def gpu_ready_probe() -> str:
        # Do not allocate any large tensors; just a trivial statement.
        # Presence of this function is enough for the ZeroGPU startup check.
        return "gpu-probe-ok"


# --- Gradio UI (for smoke test) ---
async def ping():
    return "ok"

with gr.Blocks() as demo:
    gr.Markdown("### LLM Runner (Python) listening on RabbitMQ")
    btn = gr.Button("Ping")
    out = gr.Textbox()
    btn.click(ping, inputs=None, outputs=out)

# --- FastAPI app with lifespan (replaces deprecated @on_event) ---
@asynccontextmanager
async def lifespan(_app: FastAPI):
    # startup
    await publisher.connect()
    await service.init()
    await listener.start(DECLS)
    yield
    # shutdown (optional cleanup)
    # await publisher.close()  # if your RabbitRepo exposes this
    # await listener.stop()    # if you implement stop()

app = FastAPI(lifespan=lifespan)
app = gr.mount_gradio_app(app, demo, path="/")

@app.get("/health")
async def health():
    return {"status": "ok"}

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=7860)