Spaces:

vmjn
/

kronos-forecast

Sleeping

App Files Files Community

vmjn commited on Apr 13

Commit

3e07fe0

verified ·

1 Parent(s): c4c3637

add Chronos, TimesFM, FinBERT endpoints — 4-model inference Space

Browse files

Files changed (2) hide show

Dockerfile +4 -1
app.py +256 -77

Dockerfile CHANGED Viewed

@@ -26,7 +26,10 @@ RUN pip install --user --no-cache-dir \
     "websockets>=13.0" \
     "einops>=0.7" \
     "safetensors>=0.4" \
-    "tqdm>=4.66"
 COPY --chown=user . /home/user/app

     "websockets>=13.0" \
     "einops>=0.7" \
     "safetensors>=0.4" \
+    "tqdm>=4.66" \
+    "transformers>=4.40,<5.0" \
+    "chronos-forecasting>=1.5.2" \
+    "timesfm[torch]>=1.3.0"
 COPY --chown=user . /home/user/app

app.py CHANGED Viewed

@@ -1,99 +1,98 @@
-"""Kronos-small financial forecast — minimal MCP-friendly wrapper."""
-import os
-import pandas as pd
 import numpy as np
 import torch
 import yfinance as yf
 import gradio as gr
-from model import Kronos, KronosTokenizer, KronosPredictor
-# Kronos-small: 24.7M params, fits free CPU Space comfortably.
-TOKENIZER_ID = "NeoQuasar/Kronos-Tokenizer-base"
-MODEL_ID = "NeoQuasar/Kronos-small"
-DEVICE = "cpu"
-MAX_CONTEXT = 512
-_predictor = None
-def get_predictor():
-    global _predictor
-    if _predictor is None:
-        tok = KronosTokenizer.from_pretrained(TOKENIZER_ID)
-        mdl = Kronos.from_pretrained(MODEL_ID)
-        _predictor = KronosPredictor(mdl, tok, device=DEVICE, max_context=MAX_CONTEXT)
-    return _predictor
-def _infer_freq(symbol: str) -> str:
-    # Indian MFs and some indices only have daily data on yfinance.
-    return "1d"
-def forecast(symbol: str, lookback_days: int = 180, pred_days: int = 30) -> dict:
-    """Run Kronos forecast for a symbol and return direction + predicted % change.
-    Args:
-        symbol: yfinance ticker (e.g. 'RELIANCE.NS', 'VOO', 'GOLDIETF.NS').
-        lookback_days: historical window (default 180, max 500).
-        pred_days: forecast horizon in days (default 30).
-    Returns:
-        dict with direction (+1/-1/0), pct_change (%), last_close, predicted_close,
-        n_lookback, model, status.
-    """
     symbol = (symbol or "").strip().upper()
     if not symbol:
         return {"status": "error", "error": "empty symbol"}
-    lookback_days = int(max(32, min(lookback_days or 180, 500)))
-    pred_days = int(max(1, min(pred_days or 30, 90)))
     try:
-        df = yf.download(symbol, period=f"{lookback_days + 10}d",
-                         interval="1d", progress=False, auto_adjust=False)
-        if df is None or df.empty or len(df) < 32:
-            return {"status": "error", "error": f"no data for {symbol}", "n_lookback": 0}
-        # flatten multiindex columns if present
-        if isinstance(df.columns, pd.MultiIndex):
-            df.columns = df.columns.get_level_values(0)
-        df = df.reset_index()
-        df.columns = [str(c).lower() for c in df.columns]
-        df["timestamps"] = pd.to_datetime(df["date"])
-        kdf = df[["timestamps", "open", "high", "low", "close", "volume"]].copy().tail(lookback_days)
-        kdf = kdf.dropna().reset_index(drop=True)
         if len(kdf) < 32:
-            return {"status": "error", "error": "insufficient clean data", "n_lookback": len(kdf)}
         x_df = kdf[["open", "high", "low", "close", "volume"]].copy()
         x_df["amount"] = x_df["close"] * x_df["volume"]
         x_timestamp = kdf["timestamps"]
-        # build future timestamps: business days
         last = x_timestamp.iloc[-1]
         y_timestamp = pd.Series(pd.bdate_range(start=last + pd.Timedelta(days=1), periods=pred_days))
-        predictor = get_predictor()
-        pred_df = predictor.predict(
             df=x_df, x_timestamp=x_timestamp, y_timestamp=y_timestamp,
             pred_len=pred_days, T=1.0, top_p=0.9, sample_count=1, verbose=False,
         )
         last_close = float(kdf["close"].iloc[-1])
         pred_close = float(pred_df["close"].iloc[-1])
         pct = (pred_close - last_close) / last_close * 100.0
-        direction = 1 if pct > 0.5 else (-1 if pct < -0.5 else 0)
         return {
-            "status": "ok",
-            "symbol": symbol,
-            "model": MODEL_ID,
-            "last_close": round(last_close, 4),
-            "predicted_close": round(pred_close, 4),
-            "pct_change": round(pct, 3),
-            "direction": direction,
-            "n_lookback": int(len(kdf)),
-            "pred_days": pred_days,
-            "pred_first_close": round(float(pred_df["close"].iloc[0]), 4),
             "pred_mean_close": round(float(pred_df["close"].mean()), 4),
             "pred_min_close": round(float(pred_df["close"].min()), 4),
             "pred_max_close": round(float(pred_df["close"].max()), 4),
@@ -102,18 +101,198 @@ def forecast(symbol: str, lookback_days: int = 180, pred_days: int = 30) -> dict
         return {"status": "error", "error": f"{type(e).__name__}: {e}", "symbol": symbol}
-demo = gr.Interface(
-    fn=forecast,
-    inputs=[
-        gr.Textbox(label="Symbol (yfinance)", value="RELIANCE.NS"),
-        gr.Slider(32, 500, value=180, step=1, label="Lookback days"),
-        gr.Slider(1, 90, value=30, step=1, label="Prediction days"),
-    ],
-    outputs=gr.JSON(label="Kronos forecast"),
-    title="Kronos-small forecast",
-    description="Direction + predicted % change from Kronos (finance-native foundation model).",
-    api_name="forecast",
-)
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860, mcp_server=True)

+"""Multi-model Investment OS inference Space.
+Endpoints (Gradio + MCP):
+- /forecast           — Kronos-small (finance-native candlestick foundation model)
+- /forecast_chronos   — amazon/chronos-bolt-tiny (generic TSFM, CPU-fast)
+- /forecast_timesfm   — google/timesfm-2.5-200m-pytorch (Google TSFM)
+- /score_sentiment    — ProsusAI/finbert (financial sentiment)
+All models lazy-loaded on first call. CPU-only.
+"""
+from __future__ import annotations
 import numpy as np
+import pandas as pd
 import torch
 import yfinance as yf
 import gradio as gr
+# -----------------------------------------------------------------------------
+# Shared: yfinance OHLC loader
+# -----------------------------------------------------------------------------
+def _load_ohlc(symbol: str, lookback_days: int) -> pd.DataFrame:
+    df = yf.download(symbol, period=f"{lookback_days + 10}d",
+                     interval="1d", progress=False, auto_adjust=False)
+    if df is None or df.empty:
+        return pd.DataFrame()
+    if isinstance(df.columns, pd.MultiIndex):
+        df.columns = df.columns.get_level_values(0)
+    df = df.reset_index()
+    df.columns = [str(c).lower() for c in df.columns]
+    if "date" not in df.columns:
+        return pd.DataFrame()
+    df["timestamps"] = pd.to_datetime(df["date"])
+    keep = ["timestamps", "open", "high", "low", "close", "volume"]
+    df = df[[c for c in keep if c in df.columns]].dropna().tail(lookback_days).reset_index(drop=True)
+    return df
+def _direction(pct: float) -> int:
+    return 1 if pct > 0.5 else (-1 if pct < -0.5 else 0)
+def _clamp(lb, pd_, min_lb=32, max_lb=500, max_pred=90):
+    return (int(max(min_lb, min(int(lb or 180), max_lb))),
+            int(max(1, min(int(pd_ or 30), max_pred))))
+# -----------------------------------------------------------------------------
+# Kronos — NeoQuasar/Kronos-small
+# -----------------------------------------------------------------------------
+from model import Kronos, KronosTokenizer, KronosPredictor
+KRONOS_MODEL_ID = "NeoQuasar/Kronos-small"
+KRONOS_TOKENIZER_ID = "NeoQuasar/Kronos-Tokenizer-base"
+_kronos = None
+def _get_kronos():
+    global _kronos
+    if _kronos is None:
+        tok = KronosTokenizer.from_pretrained(KRONOS_TOKENIZER_ID)
+        mdl = Kronos.from_pretrained(KRONOS_MODEL_ID)
+        _kronos = KronosPredictor(mdl, tok, device="cpu", max_context=512)
+    return _kronos
+def forecast(symbol: str, lookback_days: int = 180, pred_days: int = 30) -> dict:
+    """Kronos-small (finance-native) forecast. Returns direction + % change."""
     symbol = (symbol or "").strip().upper()
     if not symbol:
         return {"status": "error", "error": "empty symbol"}
+    lookback_days, pred_days = _clamp(lookback_days, pred_days)
     try:
+        kdf = _load_ohlc(symbol, lookback_days)
         if len(kdf) < 32:
+            return {"status": "error", "error": f"insufficient data for {symbol}", "n_lookback": len(kdf)}
         x_df = kdf[["open", "high", "low", "close", "volume"]].copy()
         x_df["amount"] = x_df["close"] * x_df["volume"]
         x_timestamp = kdf["timestamps"]
         last = x_timestamp.iloc[-1]
         y_timestamp = pd.Series(pd.bdate_range(start=last + pd.Timedelta(days=1), periods=pred_days))
+        pred_df = _get_kronos().predict(
             df=x_df, x_timestamp=x_timestamp, y_timestamp=y_timestamp,
             pred_len=pred_days, T=1.0, top_p=0.9, sample_count=1, verbose=False,
         )
         last_close = float(kdf["close"].iloc[-1])
         pred_close = float(pred_df["close"].iloc[-1])
         pct = (pred_close - last_close) / last_close * 100.0
         return {
+            "status": "ok", "symbol": symbol, "model": KRONOS_MODEL_ID,
+            "last_close": round(last_close, 4), "predicted_close": round(pred_close, 4),
+            "pct_change": round(pct, 3), "direction": _direction(pct),
+            "n_lookback": int(len(kdf)), "pred_days": pred_days,
             "pred_mean_close": round(float(pred_df["close"].mean()), 4),
             "pred_min_close": round(float(pred_df["close"].min()), 4),
             "pred_max_close": round(float(pred_df["close"].max()), 4),
         return {"status": "error", "error": f"{type(e).__name__}: {e}", "symbol": symbol}
+# -----------------------------------------------------------------------------
+# Chronos-bolt-tiny
+# -----------------------------------------------------------------------------
+CHRONOS_MODEL_ID = "amazon/chronos-bolt-tiny"
+_chronos = None
+def _get_chronos():
+    global _chronos
+    if _chronos is None:
+        from chronos import BaseChronosPipeline
+        _chronos = BaseChronosPipeline.from_pretrained(
+            CHRONOS_MODEL_ID, device_map="cpu", torch_dtype=torch.float32,
+        )
+    return _chronos
+def forecast_chronos(symbol: str, lookback_days: int = 180, pred_days: int = 30) -> dict:
+    """Chronos-bolt-tiny forecast on close prices."""
+    symbol = (symbol or "").strip().upper()
+    if not symbol:
+        return {"status": "error", "error": "empty symbol"}
+    lookback_days, pred_days = _clamp(lookback_days, pred_days)
+    try:
+        kdf = _load_ohlc(symbol, lookback_days)
+        if len(kdf) < 32:
+            return {"status": "error", "error": f"insufficient data for {symbol}", "n_lookback": len(kdf)}
+        context = torch.tensor(kdf["close"].values, dtype=torch.float32)
+        quantiles, mean = _get_chronos().predict_quantiles(
+            context=context, prediction_length=pred_days, quantile_levels=[0.1, 0.5, 0.9],
+        )
+        median = quantiles[0, :, 1].cpu().numpy()
+        low = quantiles[0, :, 0].cpu().numpy()
+        high = quantiles[0, :, 2].cpu().numpy()
+        mean_np = mean[0].cpu().numpy()
+        last_close = float(kdf["close"].iloc[-1])
+        pred_close = float(median[-1])
+        pct = (pred_close - last_close) / last_close * 100.0
+        return {
+            "status": "ok", "symbol": symbol, "model": CHRONOS_MODEL_ID,
+            "last_close": round(last_close, 4), "predicted_close": round(pred_close, 4),
+            "pct_change": round(pct, 3), "direction": _direction(pct),
+            "n_lookback": int(len(kdf)), "pred_days": pred_days,
+            "pred_mean_close": round(float(np.mean(mean_np)), 4),
+            "pred_low_close": round(float(low[-1]), 4),
+            "pred_high_close": round(float(high[-1]), 4),
+        }
+    except Exception as e:
+        return {"status": "error", "error": f"{type(e).__name__}: {e}", "symbol": symbol}
+# -----------------------------------------------------------------------------
+# TimesFM 2.5 (200M PyTorch)
+# -----------------------------------------------------------------------------
+TIMESFM_MODEL_ID = "google/timesfm-2.5-200m-pytorch"
+_timesfm = None
+def _get_timesfm():
+    global _timesfm
+    if _timesfm is None:
+        import timesfm
+        _timesfm = timesfm.TimesFm_2p5_200M_torch.from_pretrained(TIMESFM_MODEL_ID)
+    return _timesfm
+def forecast_timesfm(symbol: str, lookback_days: int = 180, pred_days: int = 30) -> dict:
+    """TimesFM 2.5 (200M) forecast on close prices."""
+    symbol = (symbol or "").strip().upper()
+    if not symbol:
+        return {"status": "error", "error": "empty symbol"}
+    lookback_days, pred_days = _clamp(lookback_days, pred_days)
+    try:
+        kdf = _load_ohlc(symbol, lookback_days)
+        if len(kdf) < 32:
+            return {"status": "error", "error": f"insufficient data for {symbol}", "n_lookback": len(kdf)}
+        model = _get_timesfm()
+        point, _q = model.forecast(
+            inputs=[kdf["close"].values.astype(np.float32)],
+            freq=[0], horizon=pred_days,
+        )
+        pred = np.asarray(point[0])
+        last_close = float(kdf["close"].iloc[-1])
+        pred_close = float(pred[-1])
+        pct = (pred_close - last_close) / last_close * 100.0
+        return {
+            "status": "ok", "symbol": symbol, "model": TIMESFM_MODEL_ID,
+            "last_close": round(last_close, 4), "predicted_close": round(pred_close, 4),
+            "pct_change": round(pct, 3), "direction": _direction(pct),
+            "n_lookback": int(len(kdf)), "pred_days": pred_days,
+            "pred_mean_close": round(float(np.mean(pred)), 4),
+            "pred_min_close": round(float(np.min(pred)), 4),
+            "pred_max_close": round(float(np.max(pred)), 4),
+        }
+    except Exception as e:
+        return {"status": "error", "error": f"{type(e).__name__}: {e}", "symbol": symbol}
+# -----------------------------------------------------------------------------
+# FinBERT — ProsusAI/finbert
+# -----------------------------------------------------------------------------
+FINBERT_MODEL_ID = "ProsusAI/finbert"
+_finbert = None
+def _get_finbert():
+    global _finbert
+    if _finbert is None:
+        from transformers import pipeline
+        _finbert = pipeline(
+            "text-classification", model=FINBERT_MODEL_ID,
+            device="cpu", top_k=None, truncation=True,
+        )
+    return _finbert
+def score_sentiment(texts_json: str) -> dict:
+    """FinBERT scoring. Input: JSON array of strings (or newline-separated)."""
+    import json as _json
+    if not texts_json or not str(texts_json).strip():
+        return {"status": "error", "error": "empty input"}
+    try:
+        texts = _json.loads(texts_json)
+        if isinstance(texts, str):
+            texts = [texts]
+        if not isinstance(texts, list):
+            texts = [str(texts)]
+    except Exception:
+        texts = [t.strip() for t in str(texts_json).split("\n") if t.strip()]
+    texts = [str(t) for t in texts][:50]
+    if not texts:
+        return {"status": "error", "error": "no non-empty texts"}
+    try:
+        raw = _get_finbert()(texts)
+        pos_sum = neg_sum = neu_sum = 0.0
+        per = []
+        for item in raw:
+            entries = item if isinstance(item, list) else [item]
+            p = n = u = 0.0
+            for e in entries:
+                lbl = str(e.get("label", "")).lower()
+                sc = float(e.get("score", 0))
+                if lbl.startswith("pos"): p = sc
+                elif lbl.startswith("neg"): n = sc
+                elif lbl.startswith("neu"): u = sc
+            pos_sum += p; neg_sum += n; neu_sum += u
+            per.append({"pos": round(p, 4), "neg": round(n, 4), "neu": round(u, 4)})
+        n = len(texts)
+        return {
+            "status": "ok", "model": FINBERT_MODEL_ID, "n": n,
+            "net": round((pos_sum - neg_sum) / n, 4),
+            "pos": round(pos_sum / n, 4),
+            "neg": round(neg_sum / n, 4),
+            "neu": round(neu_sum / n, 4),
+            "per_text": per,
+        }
+    except Exception as e:
+        return {"status": "error", "error": f"{type(e).__name__}: {e}"}
+# -----------------------------------------------------------------------------
+# Gradio UI — 4 tabs, each exposes a named API for MCP discovery
+# -----------------------------------------------------------------------------
+with gr.Blocks(title="Investment OS inference") as demo:
+    gr.Markdown("# Investment OS — 3 TSFMs + FinBERT\nCPU-only, all lazy-loaded. Endpoints: `/forecast`, `/forecast_chronos`, `/forecast_timesfm`, `/score_sentiment`.")
+    with gr.Tab("Kronos"):
+        with gr.Row():
+            s1 = gr.Textbox(label="Symbol", value="VOO")
+            lb1 = gr.Slider(32, 500, value=180, step=1, label="Lookback days")
+            pd1 = gr.Slider(1, 90, value=30, step=1, label="Pred days")
+        gr.Button("Forecast").click(forecast, [s1, lb1, pd1], gr.JSON(), api_name="forecast")
+    with gr.Tab("Chronos-bolt-tiny"):
+        with gr.Row():
+            s2 = gr.Textbox(label="Symbol", value="VOO")
+            lb2 = gr.Slider(32, 500, value=180, step=1, label="Lookback days")
+            pd2 = gr.Slider(1, 90, value=30, step=1, label="Pred days")
+        gr.Button("Forecast").click(forecast_chronos, [s2, lb2, pd2], gr.JSON(), api_name="forecast_chronos")
+    with gr.Tab("TimesFM-2.5"):
+        with gr.Row():
+            s3 = gr.Textbox(label="Symbol", value="VOO")
+            lb3 = gr.Slider(32, 500, value=180, step=1, label="Lookback days")
+            pd3 = gr.Slider(1, 90, value=30, step=1, label="Pred days")
+        gr.Button("Forecast").click(forecast_timesfm, [s3, lb3, pd3], gr.JSON(), api_name="forecast_timesfm")
+    with gr.Tab("FinBERT"):
+        t4 = gr.Textbox(label="Texts (JSON array or newline-separated)",
+                        value='["Strong Q4 beats expectations","Margin pressure ahead"]', lines=6)
+        gr.Button("Score").click(score_sentiment, [t4], gr.JSON(), api_name="score_sentiment")
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860, mcp_server=True)