Spaces:

Mungert
/

GradLLM

Running

App Files Files Community

johnbridges commited on Sep 18

Commit

9f87c0c

1 Parent(s): d757694

.

Browse files

Files changed (1) hide show

app.py +256 -87

app.py CHANGED Viewed

@@ -1,90 +1,259 @@
-# app.py
-import asyncio, logging
-import gradio as gr
 from config import settings
-from rabbit_base import RabbitBase
-from listener import RabbitListenerBase
-from rabbit_repo import RabbitRepo
-from oa_server import OpenAIServers
-#from vllm_backend import VLLMChatBackend, StubImagesBackend
-#from transformers_backend import TransformersChatBackend, StubImagesBackend
-#from hf_backend import HFChatBackend, StubImagesBackend
-from hf_backend import StubImagesBackend
-from timesfm_backend import TimesFMBackend
-logging.basicConfig(
-    level=logging.INFO,
-    format="%(asctime)s [%(levelname)s] %(name)s: %(message)s"
-)
-log = logging.getLogger("app")
-# ----------------- Hugging Face Spaces helpers -----------------
 try:
-    import spaces
-    @spaces.GPU(duration=60)
-    def gpu_entrypoint() -> str:
-        return "gpu: ready"
-except Exception:
-    def gpu_entrypoint() -> str:
-        return "gpu: not available (CPU only)"
-# ----------------- RabbitMQ wiring -----------------
-publisher = RabbitRepo(external_source="openai.mq.server")
-resolver = (lambda name: "direct" if name.startswith("oa.") else settings.RABBIT_EXCHANGE_TYPE)
-base = RabbitBase(exchange_type_resolver=resolver)
-servers = OpenAIServers(
-    publisher,
-    chat_backend=TimesFMBackend(),
-    images_backend=StubImagesBackend()
-)
-handlers = {
-    "oaChatCreate": servers.handle_chat_create,
-    "oaImagesGenerate": servers.handle_images_generate,
-}
-DECLS = [
-    {"ExchangeName": "oa.chat.create", "FuncName": "oaChatCreate",
-     "MessageTimeout": 600_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
-    {"ExchangeName": "oa.images.generate", "FuncName": "oaImagesGenerate",
-     "MessageTimeout": 600_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
-]
-listener = RabbitListenerBase(base, instance_name=settings.RABBIT_INSTANCE_NAME, handlers=handlers)
-# ----------------- Startup init -----------------
-async def _startup_init():
-    try:
-        await base.connect()          # connect to RabbitMQ
-        await listener.start(DECLS)   # start queue listeners
-        return "OpenAI MQ + vLLM: ready"
-    except Exception as e:
-        log.exception("Startup init failed")
-        return f"ERROR: {e}"
-async def ping():
-    return "ok"
-# ----------------- Gradio UI -----------------
-with gr.Blocks(title="OpenAI over RabbitMQ (local vLLM)", theme=gr.themes.Soft()) as demo:
-    gr.Markdown("## OpenAI-compatible over RabbitMQ — using vLLM locally inside Space")
-    with gr.Tabs():
-        with gr.Tab("Service"):
-            btn = gr.Button("Ping")
-            out = gr.Textbox(label="Ping result")
-            btn.click(ping, inputs=None, outputs=out)
-            init_status = gr.Textbox(label="Startup status", interactive=False)
-            demo.load(fn=_startup_init, inputs=None, outputs=init_status)
-        with gr.Tab("@spaces.GPU Probe"):
-            gpu_btn = gr.Button("GPU Ready Probe", variant="primary")
-            gpu_out = gr.Textbox(label="GPU Probe Result", interactive=False)
-            gpu_btn.click(gpu_entrypoint, inputs=None, outputs=gpu_out)
-if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True, debug=True, mcp_server=True)

+# timesfm_backend.py
+import time
+import json
+import logging
+from typing import Any, Dict, List, Optional, Tuple
+import numpy as np
+from backends_base import ChatBackend, ImagesBackend  # ChatBackend for OA server
 from config import settings
+logger = logging.getLogger(__name__)
+# Try to import TimesFM. If not present, we fall back to a naive forecaster.
+_TIMESFM_AVAILABLE = False
+_TFM = None
 try:
+    # google timesfm 2.5 requires `pip install timesfm`
+    # model class name can be TimesFm (library-dependent)
+    from timesfm import TimesFm  # type: ignore
+    _TIMESFM_AVAILABLE = True
+except Exception as e:
+    logger.warning("timesfm not available (%s) — will use naive fallback.", e)
+def _parse_series(series: Any) -> np.ndarray:
+    """
+    Accepts list[float], list[int], list[dict{value:..}], or dict with 'values'.
+    Returns a 1D float numpy array. Raises ValueError on empty/invalid.
+    """
+    if series is None:
+        raise ValueError("series is required")
+    if isinstance(series, dict):
+        if "values" in series:
+            series = series["values"]
+        elif "y" in series:
+            series = series["y"]
+    vals: List[float] = []
+    if isinstance(series, (list, tuple)):
+        if series and isinstance(series[0], dict):
+            # e.g. [{"t": "...", "y": 1.2}, ...] or {"value": ...}
+            for item in series:
+                if "y" in item:
+                    vals.append(float(item["y"]))
+                elif "value" in item:
+                    vals.append(float(item["value"]))
+        else:
+            # numeric list
+            vals = [float(x) for x in series]
+    else:
+        raise ValueError("series must be a list/tuple or dict with 'values'/'y'")
+    if not vals:
+        raise ValueError("series is empty")
+    return np.asarray(vals, dtype=np.float32)
+def _fallback_forecast(y: np.ndarray, horizon: int) -> np.ndarray:
+    """
+    Very small, dependency-free fallback:
+    - if length >= 4: mean of last 4 points
+    - else: mean of all points
+    """
+    if horizon <= 0:
+        return np.zeros((0,), dtype=np.float32)
+    k = 4 if y.shape[0] >= 4 else y.shape[0]
+    base = float(np.mean(y[-k:]))
+    return np.full((horizon,), base, dtype=np.float32)
+class TimesFMBackend(ChatBackend):
+    """
+    Chat-compatible backend (for oa_server) wrapping TimesFM (if installed).
+    If TimesFM is missing, uses a naive statistical fallback.
+    """
+    def __init__(self,
+                 model_id: Optional[str] = None,
+                 device: Optional[str] = None):
+        """
+        model_id: optional identifier for logs/metadata
+        device: 'cpu' or 'cuda' (passed to TimesFm if supported by installed lib)
+        """
+        self.model_id = model_id or "google/timesfm-2.5-200m-pytorch"
+        self.device = device or "cpu"
+        self._model = None  # lazy init
+    # ---------- internal ----------
+    def _ensure_model(self):
+        if self._model is not None or not _TIMESFM_AVAILABLE:
+            return
+        try:
+            # minimal init; adjust kwargs if your installed version needs different args
+            self._model = TimesFm()  # type: ignore
+            logger.info("TimesFM model initialized.")
+        except Exception as e:
+            logger.exception("Failed to initialize TimesFM; will use fallback. %s", e)
+            self._model = None
+    # ---------- public helpers ----------
+    async def forecast(self, payload: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Unified forecast entrypoint.
+        Expected keys (directly in payload OR nested under 'data' OR 'timeseries'):
+          - series: list of numbers (or list of dicts holding 'y'/'value')
+          - horizon: int (>0)
+          - freq: optional string for metadata only
+        Returns:
+          {
+            "model": "...",
+            "horizon": int,
+            "freq": str|None,
+            "forecast": [floats],
+            "note": str|None
+          }
+        """
+        # unwrap if nested
+        if "data" in payload and isinstance(payload["data"], dict):
+            payload = {**payload, **payload["data"]}
+        if "timeseries" in payload and isinstance(payload["timeseries"], dict):
+            payload = {**payload, **payload["timeseries"]}
+        series = payload.get("series")
+        horizon = int(payload.get("horizon", 0))
+        freq = payload.get("freq")
+        y = _parse_series(series)
+        if horizon <= 0:
+            raise ValueError("horizon must be a positive integer")
+        self._ensure_model()
+        if _TIMESFM_AVAILABLE and self._model is not None:
+            # Use real TimesFM
+            try:
+                # Most TimesFM APIs are batch-oriented; we add a batch dim and remove it later
+                # If your installed version differs (e.g., .predict with signature),
+                # change these two lines accordingly:
+                y_batch = y[None, :]
+                preds = self._model.predict(y_batch, horizon=horizon)  # type: ignore
+                # preds shape => (1, horizon)
+                fc = np.asarray(preds).reshape(-1).tolist()
+                note = None
+            except Exception as e:
+                logger.exception("TimesFM predict failed; falling back. %s", e)
+                fc = _fallback_forecast(y, horizon).tolist()
+                note = "fallback_used_due_to_predict_error"
+        else:
+            # Fallback path
+            fc = _fallback_forecast(y, horizon).tolist()
+            note = "fallback_used_timesfm_missing"
+        return {
+            "model": self.model_id,
+            "horizon": horizon,
+            "freq": freq,
+            "forecast": fc,
+            "note": note,
+        }
+    # ---------- ChatBackend interface (for oa_server) ----------
+    async def stream(self, request: Dict[str, Any]):
+        """
+        OA-compatible streaming shim:
+        - Extracts forecast inputs from request (or from last user message JSON).
+        - Runs forecast() and yields ONE OpenAI-style chat chunk whose content
+          is a compact JSON string with the forecast result.
+        """
+        rid = f"chatcmpl-timesfm-{int(time.time())}"
+        now = int(time.time())
+        # try to gather payload
+        payload: Dict[str, Any] = {}
+        # 1) allow direct shape: {series, horizon, ...} / or under 'data'/'timeseries'
+        if isinstance(request, dict):
+            payload = dict(request)  # shallow copy
+        # 2) optionally parse last user message if it's JSON
+        try:
+            msgs = request.get("messages") if isinstance(request, dict) else None
+            if isinstance(msgs, list) and msgs:
+                for m in reversed(msgs):
+                    if isinstance(m, dict) and m.get("role") == "user":
+                        c = m.get("content")
+                        if isinstance(c, str):
+                            c_str = c.strip()
+                            if (c_str.startswith("{") and c_str.endswith("}")) or (
+                                c_str.startswith("[") and c_str.endswith("]")
+                            ):
+                                # try parse JSON content
+                                parsed = json.loads(c_str)
+                                if isinstance(parsed, dict):
+                                    payload.update(parsed)
+                        break
+        except Exception:
+            # non-fatal: keep whatever we had
+            pass
+        # run forecast
+        try:
+            result = await self.forecast(payload)
+        except Exception as e:
+            # return an error chunk in OpenAI shape
+            err = {"error": str(e)}
+            content = json.dumps(err, separators=(",", ":"), ensure_ascii=False)
+            yield {
+                "id": rid,
+                "object": "chat.completion.chunk",
+                "created": now,
+                "model": self.model_id,
+                "choices": [
+                    {
+                        "index": 0,
+                        "delta": {"role": "assistant", "content": content},
+                        "finish_reason": "stop",
+                    }
+                ],
+            }
+            return
+        # success: compact JSON content so your .NET can parse
+        content = json.dumps(
+            {
+                "model": result.get("model"),
+                "horizon": result.get("horizon"),
+                "freq": result.get("freq"),
+                "forecast": result.get("forecast"),
+                "note": result.get("note"),
+                "backend": "timesfm",
+            },
+            separators=(",", ":"),
+            ensure_ascii=False,
+        )
+        yield {
+            "id": rid,
+            "object": "chat.completion.chunk",
+            "created": now,
+            "model": self.model_id,
+            "choices": [
+                {
+                    "index": 0,
+                    "delta": {"role": "assistant", "content": content},
+                    "finish_reason": "stop",
+                }
+            ],
+        }
+# Optional: keep an images stub to satisfy oa_server wiring if needed elsewhere
+class StubImagesBackend(ImagesBackend):
+    async def generate_b64(self, request: Dict[str, Any]) -> str:
+        logger.warning("Image generation not supported in TimesFM backend.")
+        return (
+            "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR4nGP4BwQACfsD/etCJH0AAAAASUVORK5CYII="
+        )