Spaces:

Minyans
/

kronos-forecast

Running

App Files Files Community

Minyans commited on Apr 14

Commit

f244203

1 Parent(s): 202b84c

Make device and @spaces.GPU decorator conditional — supports both ZeroGPU and CPU-basic tiers

Browse files

Files changed (1) hide show

app.py +23 -6

app.py CHANGED Viewed

@@ -31,12 +31,28 @@ if not hasattr(_hfh, 'HfFolder'):
 # ─────────────────────────────────────────────────────────────────────────────
-import spaces
 import gradio as gr
 import json
 import os
 import pandas as pd
 # Kronos model source is in ./model/ (copied from repo)
 sys.path.insert(0, os.path.dirname(__file__))
 from model import Kronos, KronosTokenizer, KronosPredictor
@@ -46,14 +62,14 @@ MODEL_ID     = "NeoQuasar/Kronos-mini"
 LOOKBACK     = 800
 PRED_LEN     = 3
-# Load weights at startup on CPU — @spaces.GPU moves computation to CUDA at call time
 print("Loading Kronos-mini weights...", flush=True)
 tokenizer = KronosTokenizer.from_pretrained(TOKENIZER_ID)
 model     = Kronos.from_pretrained(MODEL_ID)
 print("Weights loaded.", flush=True)
-@spaces.GPU
 def forecast(payload_json: str) -> str:
     """
     Input:  JSON string — list of:
@@ -63,7 +79,7 @@ def forecast(payload_json: str) -> str:
               { "symbol": "MSFT.US",
                 "forecast": [{"open":..,"high":..,"low":..,"close":..,"date":"YYYY-MM-DD"}, ...] }
     """
-    predictor = KronosPredictor(model, tokenizer, device="cuda", max_context=2048, clip=5)
     payload   = json.loads(payload_json)
     symbols, df_list, x_ts_list, y_ts_list = [], [], [], []
@@ -98,7 +114,7 @@ def forecast(payload_json: str) -> str:
         T=0.6,
         top_k=0,
         top_p=0.9,
-        sample_count=5,   # GPU is fast — use full 5 samples
         verbose=False
     )
@@ -123,7 +139,8 @@ demo = gr.Interface(
         "**Kronos-mini** (NeoQuasar/Kronos-mini) — AAAI 2026 autoregressive transformer "
         "trained on 12B+ K-line records from 45 global exchanges.\n\n"
         "POST OHLCV data for up to 6 symbols → receive 3-day OHLC predictions.\n"
-        f"Settings: lookback={LOOKBACK} bars | pred_len={PRED_LEN} | T=0.6 | n=5 samples"
     ),
     flagging_mode="never",
     api_name="predict"

 # ─────────────────────────────────────────────────────────────────────────────
 import gradio as gr
 import json
 import os
+import torch
 import pandas as pd
+# ── ZeroGPU / CPU-basic compatibility ────────────────────────────────────────
+# On ZeroGPU spaces, `spaces` is importable and @spaces.GPU allocates an H200.
+# On CPU-basic spaces, the `spaces` package is absent — we fall back gracefully.
+try:
+    import spaces
+    _GPU_AVAILABLE = True
+except ImportError:
+    _GPU_AVAILABLE = False
+def _gpu_if_available(fn):
+    """Decorator: use @spaces.GPU on ZeroGPU, identity on CPU-basic."""
+    return spaces.GPU(fn) if _GPU_AVAILABLE else fn
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"Running on device: {DEVICE}", flush=True)
 # Kronos model source is in ./model/ (copied from repo)
 sys.path.insert(0, os.path.dirname(__file__))
 from model import Kronos, KronosTokenizer, KronosPredictor
 LOOKBACK     = 800
 PRED_LEN     = 3
+# Load weights at startup (CPU) — moved to GPU at call time if ZeroGPU
 print("Loading Kronos-mini weights...", flush=True)
 tokenizer = KronosTokenizer.from_pretrained(TOKENIZER_ID)
 model     = Kronos.from_pretrained(MODEL_ID)
 print("Weights loaded.", flush=True)
+@_gpu_if_available
 def forecast(payload_json: str) -> str:
     """
     Input:  JSON string — list of:
               { "symbol": "MSFT.US",
                 "forecast": [{"open":..,"high":..,"low":..,"close":..,"date":"YYYY-MM-DD"}, ...] }
     """
+    predictor = KronosPredictor(model, tokenizer, device=DEVICE, max_context=2048, clip=5)
     payload   = json.loads(payload_json)
     symbols, df_list, x_ts_list, y_ts_list = [], [], [], []
         T=0.6,
         top_k=0,
         top_p=0.9,
+        sample_count=3 if DEVICE == "cpu" else 5,  # 3 on CPU to keep latency reasonable
         verbose=False
     )
         "**Kronos-mini** (NeoQuasar/Kronos-mini) — AAAI 2026 autoregressive transformer "
         "trained on 12B+ K-line records from 45 global exchanges.\n\n"
         "POST OHLCV data for up to 6 symbols → receive 3-day OHLC predictions.\n"
+        f"Settings: lookback={LOOKBACK} bars | pred_len={PRED_LEN} | T=0.6 | "
+        f"device={DEVICE} | n={'5' if DEVICE == 'cuda' else '3'} samples"
     ),
     flagging_mode="never",
     api_name="predict"