Spaces:

Gilette
/

volatilitypredictor

Sleeping

App Files Files Community

Gil Stetler commited on Oct 25, 2025

Commit

67dad62

1 Parent(s): 6a68c5b

test on previous data with mse

Browse files

Files changed (1) hide show

app.py +48 -30

app.py CHANGED Viewed

@@ -1,6 +1,4 @@
 # app.py
-import os
-import io
 import numpy as np
 import pandas as pd
 import torch
@@ -12,68 +10,88 @@ from chronos import ChronosPipeline
 MODEL_ID = "amazon/chronos-t5-large"
 PREDICTION_LENGTH = 12
-NUM_SAMPLES = 100  # adjust if you want tighter/faster
-# Choose a sensible dtype/device for Space hardware
 device = "cuda" if torch.cuda.is_available() else "cpu"
 dtype = torch.bfloat16 if device == "cuda" else torch.float32
-# Load once at startup (HF Spaces cache model weights between runs)
 pipe = ChronosPipeline.from_pretrained(
     MODEL_ID,
-    device_map="auto",          # uses GPU if available
     torch_dtype=dtype,
 )
-# Small helper to run the full demo pipeline
-def run_forecast():
-    # 1) Load example data (univariate)
     df = pd.read_csv(
         "https://raw.githubusercontent.com/AileenNielsen/TimeSeriesAnalysisWithPython/master/data/AirPassengers.csv"
     )
     y = df["#Passengers"].astype(float).to_numpy()
-    # 2) Forecast with Chronos
-    context = torch.tensor(y, dtype=torch.float32)
     fcst = pipe.predict(context, prediction_length=PREDICTION_LENGTH, num_samples=NUM_SAMPLES)  # [1, S, H]
     samples = fcst[0].cpu().numpy()  # (S, H)
-    # 3) Summaries
     low, median, high = np.quantile(samples, [0.1, 0.5, 0.9], axis=0)
-    # 4) Plot history + forecast
-    fig = plt.figure(figsize=(8, 4))
-    x_hist = np.arange(len(y))
-    x_fcst = np.arange(len(y), len(y) + PREDICTION_LENGTH)
-    plt.plot(x_hist, y, label="history")
-    plt.plot(x_fcst, median, label="median")
     plt.fill_between(x_fcst, low, high, alpha=0.3, label="80% interval")
-    plt.title("Chronos-T5-Large Forecast")
     plt.xlabel("time")
     plt.ylabel("#Passengers")
-    plt.legend()
     plt.tight_layout()
-    # Also return the raw curves if you want to inspect/download
     out_json = {
-        "prediction_length": PREDICTION_LENGTH,
         "num_samples": int(NUM_SAMPLES),
         "median": median.tolist(),
         "p10": low.tolist(),
         "p90": high.tolist(),
     }
-    return fig, out_json
-with gr.Blocks(title="Chronos-T5-Large • AirPassengers Demo") as demo:
     gr.Markdown(
-        "## Chronos-T5-Large (zero-shot forecasting)\n"
-        "Click **Run forecast** to compute on the server (CPU/GPU of this Space)."
     )
-    run_btn = gr.Button("Run forecast", variant="primary")
-    plot = gr.Plot(label="Forecast")
-    meta = gr.JSON(label="Forecast summary (median, p10, p90)")
-    run_btn.click(fn=run_forecast, inputs=None, outputs=[plot, meta])
 if __name__ == "__main__":
     demo.launch()

 # app.py
 import numpy as np
 import pandas as pd
 import torch
 MODEL_ID = "amazon/chronos-t5-large"
 PREDICTION_LENGTH = 12
+NUM_SAMPLES = 100  # increase for smoother quantiles (slower)
 device = "cuda" if torch.cuda.is_available() else "cpu"
 dtype = torch.bfloat16 if device == "cuda" else torch.float32
+# Load once at startup (HF Spaces cache between runs)
 pipe = ChronosPipeline.from_pretrained(
     MODEL_ID,
+    device_map="auto",
     torch_dtype=dtype,
 )
+def run_forecast_and_evaluate():
+    # 1) Load univariate example data
     df = pd.read_csv(
         "https://raw.githubusercontent.com/AileenNielsen/TimeSeriesAnalysisWithPython/master/data/AirPassengers.csv"
     )
     y = df["#Passengers"].astype(float).to_numpy()
+    n = len(y)
+    if n <= PREDICTION_LENGTH + 5:
+        raise gr.Error("Time series too short for a holdout evaluation.")
+    # 2) Train/forecast split:
+    #    Use all but the last PREDICTION_LENGTH points as context (train),
+    #    and compare forecast to the real last PREDICTION_LENGTH points (test).
+    y_train = y[: n - PREDICTION_LENGTH]
+    y_test = y[n - PREDICTION_LENGTH :]
+    context = torch.tensor(y_train, dtype=torch.float32)
     fcst = pipe.predict(context, prediction_length=PREDICTION_LENGTH, num_samples=NUM_SAMPLES)  # [1, S, H]
     samples = fcst[0].cpu().numpy()  # (S, H)
+    # 3) Summaries & metrics
     low, median, high = np.quantile(samples, [0.1, 0.5, 0.9], axis=0)
+    # "mean standard error" is ambiguous; commonly MSE + RMSE are reported:
+    mse = float(np.mean((median - y_test) ** 2))
+    rmse = float(np.sqrt(mse))
+    # 4) Plot: full history + forecast horizon vs ground truth
+    fig = plt.figure(figsize=(9, 4))
+    x_hist = np.arange(len(y_train))
+    x_fcst = np.arange(len(y_train), len(y_train) + PREDICTION_LENGTH)
+    plt.plot(x_hist, y_train, label="history")
+    plt.plot(x_fcst, y_test, label="actual (holdout)")
+    plt.plot(x_fcst, median, linestyle="--", label="forecast (median)")
     plt.fill_between(x_fcst, low, high, alpha=0.3, label="80% interval")
+    plt.title("Chronos-T5-Large • Holdout Evaluation")
     plt.xlabel("time")
     plt.ylabel("#Passengers")
+    plt.legend(loc="best")
     plt.tight_layout()
+    # JSON payload for inspection/download
     out_json = {
+        "prediction_length": int(PREDICTION_LENGTH),
         "num_samples": int(NUM_SAMPLES),
+        "metrics": {"MSE": mse, "RMSE": rmse},
         "median": median.tolist(),
         "p10": low.tolist(),
         "p90": high.tolist(),
+        "actual": y_test.tolist(),
     }
+    # Metrics text to display prominently
+    metrics_md = f"**MSE:** {mse:.3f}  **RMSE:** {rmse:.3f}"
+    return fig, out_json, metrics_md
+with gr.Blocks(title="Chronos-T5-Large • Holdout Demo") as demo:
     gr.Markdown(
+        "## Chronos-T5-Large (zero-shot forecasting) — Holdout Evaluation\n"
+        "Click **Run** to forecast the last 12 months from AirPassengers and compare to the true values.\n"
+        "Computation runs on this Space's server hardware."
     )
+    run_btn = gr.Button("Run", variant="primary")
+    plot = gr.Plot(label="Forecast vs Actual (holdout)")
+    meta = gr.JSON(label="Data & Metrics")
+    metrics = gr.Markdown(label="Metrics")
+    run_btn.click(run_forecast_and_evaluate, inputs=None, outputs=[plot, meta, metrics])
 if __name__ == "__main__":
     demo.launch()