Spaces:

Mungert
/

GradLLM

Sleeping

App Files Files Community

johnbridges commited on Aug 16, 2025

Commit

6195aba

1 Parent(s): 3692feb

remove unused files

Browse files

Files changed (7) hide show

cloud_event.py +0 -0
factory.py +0 -8
function_tracker.py +0 -44
runners/base.py +0 -17
runners/echo.py +0 -46
service.py +0 -254
streaming.py +0 -22

cloud_event.py DELETED Viewed

File without changes

factory.py DELETED Viewed

@@ -1,8 +0,0 @@
-# factories.py
-from runners.echo import EchoRunner
-from runners.base import ILLMRunner
-from typing import Dict, Any
-async def default_runner_factory(context: Dict[str, Any]) -> ILLMRunner:
-    # choose runner by context["LLMRunnerType"] if you need variants
-    return EchoRunner(publisher=context["_publisher"], settings=context["_settings"])

function_tracker.py DELETED Viewed

@@ -1,44 +0,0 @@
-# function_tracker.py
-from __future__ import annotations
-from dataclasses import dataclass
-from typing import Dict, List
-import random
-import logging
-logger = logging.getLogger(__name__)
-@dataclass
-class TrackedCall:
-    FunctionCallId: str
-    FunctionName: str
-    IsProcessed: bool = False
-    Payload: str = ""
-class FunctionCallTracker:
-    def __init__(self) -> None:
-        self._by_msg: Dict[str, Dict[str, TrackedCall]] = {}
-    @staticmethod
-    def gen_id() -> str:
-        return f"call_{random.randint(10_000_000, 99_999_999)}"
-    def add(self, message_id: str, fn_name: str, payload: str) -> str:
-        call_id = self.gen_id()
-        self._by_msg.setdefault(message_id, {})[call_id] = TrackedCall(call_id, fn_name, False, payload)
-        return call_id
-    def mark_processed(self, message_id: str, call_id: str, payload: str = "") -> None:
-        m = self._by_msg.get(message_id, {})
-        if call_id in m:
-            m[call_id].IsProcessed = True
-            if payload:
-                m[call_id].Payload = payload
-    def all_processed(self, message_id: str) -> bool:
-        m = self._by_msg.get(message_id, {})
-        return bool(m) and all(x.IsProcessed for x in m.values())
-    def processed_list(self, message_id: str) -> List[TrackedCall]:
-        return list(self._by_msg.get(message_id, {}).values())
-    def clear(self, message_id: str) -> None:
-        self._by_msg.pop(message_id, None)

runners/base.py DELETED Viewed

@@ -1,17 +0,0 @@
-from abc import ABC, abstractmethod
-from typing import Any
-class ILLMRunner(ABC):
-    Type: str = "BaseLLM"
-    IsEnabled: bool = True
-    IsStateStarting: bool = False
-    IsStateFailed: bool = False
-    @abstractmethod
-    async def StartProcess(self, llmServiceObj: dict) -> None: ...
-    @abstractmethod
-    async def RemoveProcess(self, sessionId: str) -> None: ...
-    @abstractmethod
-    async def StopRequest(self, sessionId: str) -> None: ...
-    @abstractmethod
-    async def SendInputAndGetResponse(self, llmServiceObj: dict) -> None: ...

runners/echo.py DELETED Viewed

@@ -1,46 +0,0 @@
-# runners/echo.py
-from __future__ import annotations
-from typing import Any, Dict, Optional
-from .base import ILLMRunner
-from models import LLMServiceObj
-from function_tracker import FunctionCallTracker
-import logging
-logger = logging.getLogger(__name__)
-class EchoRunner(ILLMRunner):
-    Type = "TurboLLM"
-    IsEnabled = True
-    IsStateStarting = False
-    IsStateFailed = False
-    def __init__(self, publisher, settings):
-        self._pub = publisher
-        self._settings = settings
-        self._tracker = FunctionCallTracker()
-    async def StartProcess(self, llmServiceObj: dict) -> None:
-        logger.info(f"StartProcess called with: {llmServiceObj}")
-        # pretend to “warm up”
-        pass
-    async def RemoveProcess(self, sessionId: str) -> None:
-        logger.info(f"RemoveProcess called for session: {sessionId}")
-        # nothing to clean here
-        pass
-    async def StopRequest(self, sessionId: str) -> None:
-        logger.info(f"StopRequest called for session: {sessionId}")
-        # no streaming loop to stop in echo
-        pass
-    async def SendInputAndGetResponse(self, llmServiceObj: dict) -> None:
-        logger.info(f"SendInputAndGetResponse called with: {llmServiceObj}")
-        llm = LLMServiceObj(**llmServiceObj)
-        if llm.UserInput.startswith("<|START_AUDIO|>") or llm.UserInput.startswith("<|STOP_AUDIO|>"):
-            logger.debug("Audio input detected, ignoring in echo.")
-            return
-        # Echo behavior (match UI format)
-        await self._pub.publish("llmServiceMessage", LLMServiceObj(LlmMessage=f"<User:> {llm.UserInput}\n\n"))
-        await self._pub.publish("llmServiceMessage", LLMServiceObj(LlmMessage=f"<Assistant:> You said: {llm.UserInput}\n"))
-        await self._pub.publish("llmServiceMessage", LLMServiceObj(LlmMessage="<end-of-line>"))

service.py DELETED Viewed

@@ -1,254 +0,0 @@
-# service.py
-import asyncio
-from dataclasses import dataclass
-from typing import Any, Dict, Optional, Callable, Awaitable
-from config import settings
-from models import LLMServiceObj, ResultObj
-from rabbit_repo import RabbitRepo
-from runners.base import ILLMRunner
-from message_helper import success as _ok, error as _err
-import logging
-logger = logging.getLogger(__name__)
-@dataclass
-class _Session:
-    Runner: Optional[ILLMRunner]
-    FullSessionId: str
-class LLMService:
-    """
-    Python/Gradio equivalent of your .NET LLMService.
-    Keeps identical field names and queue semantics when talking to RabbitMQ.
-    """
-    def __init__(
-        self,
-        publisher: RabbitRepo,
-        runner_factory: Callable[[Dict[str, Any]], Awaitable[ILLMRunner]],
-    ):
-        self._pub: RabbitRepo = publisher
-        self._runner_factory = runner_factory  # async factory: dict -> ILLMRunner
-        self._sessions: Dict[str, _Session] = {}
-        self._ready = asyncio.Event()
-        self._ready.set()  # call clear()/set() if you preload history
-        self._service_id_lc = settings.SERVICE_ID.lower()
-    async def init(self) -> None:
-        """Hook to preload history/sessions; call self._ready.set() when finished."""
-        pass
-    # ---------------------------- helpers ----------------------------
-    def _to_model(self, data: Any) -> LLMServiceObj:
-        # Defensive: ensure required nested objects are dicts, not None
-        if data.get("FunctionCallData") is None:
-            data["FunctionCallData"] = {}
-        if data.get("UserInfo") is None:
-            data["UserInfo"] = {}
-        return LLMServiceObj(**data)
-    async def _emit_result(
-        self,
-        obj: LLMServiceObj | Dict[str, Any],
-        message: str,
-        success: bool,
-        queue: str,
-        *,
-        check_system: bool = False,
-        include_llm_message: bool = True,
-    ) -> None:
-        """
-        Build a ResultObj-style message on the wire, mirroring your .NET usage.
-        check_system=True -> don't publish if obj.IsSystemLlm is True (matches your rule).
-        """
-        llm = obj if isinstance(obj, LLMServiceObj) else LLMServiceObj(**obj)
-        llm.ResultMessage = message
-        llm.ResultSuccess = success
-        if include_llm_message:
-            llm.LlmMessage = _ok(message) if success else _err(message)
-        if check_system and llm.IsSystemLlm:
-            return
-        # You publish LLMServiceObj on "llmServiceMessage"/"llmSessionMessage" in .NET
-        await self._pub.publish(queue, llm)
-    def _session_for(self, session_id: str) -> Optional[_Session]:
-        return self._sessions.get(session_id)
-    # ---------------------------- API methods ----------------------------
-    async def StartProcess(self, payload: Any) -> None:
-        llm = self._to_model(payload)
-        # Validate critical fields
-        if not llm.RequestSessionId:
-            await self._emit_result(llm, "Error: RequestSessionId is required.", False, "llmServiceMessage")
-            return
-        if not llm.LLMRunnerType:
-            await self._emit_result(llm, "Error: LLMRunnerType is required.", False, "llmServiceMessage")
-            return
-        # Construct session id like C#: RequestSessionId + "_" + LLMRunnerType
-        session_id = f"{llm.RequestSessionId}_{llm.LLMRunnerType}"
-        llm.SessionId = session_id
-        # Wait ready (max 120s) exactly like the C# logic
-        try:
-            await asyncio.wait_for(self._ready.wait(), timeout=120)
-        except asyncio.TimeoutError:
-            await self._emit_result(
-                llm, "Timed out waiting for initialization.", False, "llmServiceMessage", check_system=True
-            )
-            return
-        sess = self._session_for(session_id)
-        runner = sess.Runner if sess else None
-        create_new = (runner is None) or getattr(runner, "IsStateFailed", False)
-        if create_new:
-            # Remove previous runner if exists
-            if runner:
-                try:
-                    await runner.RemoveProcess(session_id)
-                except Exception:
-                    pass
-            # Create runner from factory (pass a plain dict for decoupling)
-            runner = await self._runner_factory({
-                **llm.model_dump(by_alias=True),
-                "_publisher": self._pub,
-                "_settings": settings,
-            })
-            if not runner.IsEnabled:
-                await self._emit_result(
-                    llm,
-                    f"{llm.LLMRunnerType} {settings.SERVICE_ID} not started as it is disabled.",
-                    True,
-                    "llmServiceMessage",
-                )
-                return
-            await self._emit_result(
-                llm, f"Starting {runner.Type} {settings.SERVICE_ID} Expert", True, "llmServiceMessage", check_system=True
-            )
-            await runner.StartProcess(llm.model_dump(by_alias=True))
-            self._sessions[session_id] = _Session(Runner=runner, FullSessionId=session_id)
-            # Friendly greeting for your renamed service
-            if self._service_id_lc in {"monitor", "gradllm"}:
-                await self._emit_result(
-                    llm,
-                    f"Hi i'm {runner.Type} your {settings.SERVICE_ID} Assistant. How can I help you.",
-                    True,
-                    "llmServiceMessage",
-                    check_system=True,
-                )
-        # Notify "started" (full LLMServiceObj)
-        await self._pub.publish("llmServiceStarted", llm)
-    async def RemoveSession(self, payload: Any) -> None:
-        llm = self._to_model(payload)
-        base = (llm.SessionId or "").split("_")[0]
-        if not base:
-            await self._emit_result(llm, "Error: SessionId is required to remove sessions.", False, "llmServiceMessage")
-            return
-        targets = [k for k in list(self._sessions.keys()) if k.startswith(base + "_")]
-        msgs: list[str] = []
-        ok = True
-        for sid in targets:
-            s = self._sessions.get(sid)
-            if not s or not s.Runner:
-                continue
-            try:
-                await s.Runner.RemoveProcess(sid)
-                s.Runner = None
-                self._sessions.pop(sid, None)  # ← free the entry
-                msgs.append(sid)
-            except Exception as e:
-                ok = False
-                msgs.append(f"Error {sid}: {e}")
-        if ok:
-            await self._emit_result(
-                llm,
-                f"Success: Removed sessions for {' '.join(msgs) if msgs else '(none)'}",
-                True,
-                "llmSessionMessage",
-                check_system=True,
-            )
-        else:
-            await self._emit_result(llm, " ".join(msgs), False, "llmServiceMessage")
-    async def StopRequest(self, payload: Any) -> None:
-        llm = self._to_model(payload)
-        sid = llm.SessionId or ""
-        s = self._session_for(sid)
-        if not s or not s.Runner:
-            await self._emit_result(llm, f"Error: Runner missing for session {sid}.", False, "llmServiceMessage")
-            return
-        await s.Runner.StopRequest(sid)
-        await self._emit_result(
-            llm,
-            f"Success {s.Runner.Type} {settings.SERVICE_ID} Assistant output has been halted",
-            True,
-            "llmServiceMessage",
-            check_system=True,
-        )
-    async def UserInput(self, payload: Any) -> None:
-        llm = self._to_model(payload)
-        sid = llm.SessionId or ""
-        s = self._session_for(sid)
-        if not s or not s.Runner:
-            await self._emit_result(llm, f"Error: SessionId {sid} has no running process.", False, "llmServiceMessage")
-            return
-        r: ILLMRunner = s.Runner
-        if getattr(r, "IsStateStarting", False):
-            await self._emit_result(llm, "Please wait, the assistant is starting...", False, "llmServiceMessage")
-            return
-        if getattr(r, "IsStateFailed", False):
-            await self._emit_result(llm, "The Assistant is stopped. Try reloading.", False, "llmServiceMessage")
-            return
-        # Let runner push partials itself if desired; we still return a small ack
-        await r.SendInputAndGetResponse(llm.model_dump(by_alias=True))
-    async def QueryIndexResult(self, payload: Any) -> None:
-        try:
-            data = payload if isinstance(payload, dict) else {}
-            outputs = data.get("QueryResults") or []
-            rag_data = "\n".join([x.get("Output", "") for x in outputs if isinstance(x, dict)])
-            # NEW: show RAG to the chat like tool output
-            await self._pub.publish("llmServiceMessage", LLMServiceObj(LlmMessage=f"<Function Response:> {rag_data}\n\n"))
-            await self._pub.publish("llmServiceMessage", LLMServiceObj(LlmMessage="</functioncall-complete>"))
-            # keep your existing summary object (nice for observers/metrics)
-            await self._pub.publish(
-                "llmServiceMessage",
-                ResultObj(Message=data.get("Message", ""), Success=bool(data.get("Success", False)), Data=rag_data),
-            )
-        except Exception as e:
-            await self._pub.publish("llmServiceMessage", ResultObj(Message=str(e), Success=False))
-    async def GetFunctionRegistry(self, filtered: bool = False) -> None:
-        """
-        Wire up to your real registry when ready.
-        For now, mimic your success message payload.
-        """
-        catalog = "{}"  # replace with real JSON
-        msg = f"Success : Got GetFunctionCatalogJson : {catalog}"
-        await self._pub.publish(
-            "llmServiceMessage",
-            ResultObj(Message=msg, Success=True),
-        )

streaming.py DELETED Viewed

@@ -1,22 +0,0 @@
-# streaming.py
-import asyncio
-import logging
-logger = logging.getLogger(__name__)
-async def stream_in_chunks(publish, exchange: str, llm_obj_builder, text: str,
-                           batch_size: int = 3, max_chars: int = 100,
-                           base_delay_ms: int = 30, per_char_ms: int = 2) -> None:
-    seps = set(" ,!?{}.:;\n")
-    buf, parts, count = [], [], 0
-    for ch in text:
-        parts.append(ch)
-        if ch in seps:
-            buf.append("".join(parts)); parts.clear(); count += 1
-            if count >= batch_size or sum(len(x) for x in buf) >= max_chars:
-                o = llm_obj_builder("".join(buf))
-                await publish(exchange, o)
-                await asyncio.sleep((base_delay_ms + per_char_ms * sum(len(x) for x in buf))/1000)
-                buf.clear(); count = 0
-    if parts: buf.append("".join(parts))
-    if buf:
-        await publish(exchange, llm_obj_builder("".join(buf)))