forecasting

Build error

App Files Files Community

adamkahle commited on Dec 14, 2025

Commit

8c39017

verified ·

1 Parent(s): d3c8efa

Update app.py

Browse files

Files changed (1) hide show

app.py +82 -181

app.py CHANGED Viewed

@@ -22,9 +22,6 @@ from statsforecast.models import (
 from utilsforecast.evaluation import evaluate
 from utilsforecast.losses import mae, mape, mase, mse, rmse, smape
-from sklearn.linear_model import Ridge as SkRidge
-from sklearn.linear_model import Lasso as SkLasso
 REQUIRED_COLS = ["unique_id", "ds", "y"]
 PLOT_TAIL_POINTS = 300
@@ -119,10 +116,67 @@ def align_future_x_to_horizon(df_train, X_future, xcols, h):
     return X_h.reset_index(drop=True)
 def create_future_plot(fcst_df, original_df, title="Forecast"):
-    plt.figure(figsize=(12, 7))
     if fcst_df is None or fcst_df.empty:
         return None
     forecast_cols = [c for c in fcst_df.columns if c not in ["unique_id", "ds"]]
     unique_ids = fcst_df["unique_id"].unique()
     colors = plt.cm.tab10.colors
@@ -160,6 +214,7 @@ def create_future_plot(fcst_df, original_df, title="Forecast"):
     fig = plt.gcf()
     fig.autofmt_xdate()
     return fig
@@ -178,110 +233,6 @@ def export_results(eval_df, future_df):
     return out
-def _normalize_timegpt_output(tgpt_raw, df_y, h):
-    tgpt = _ensure_pandas_df(tgpt_raw)
-    if tgpt is None or tgpt.empty:
-        raise ValueError("TimeGPT returned empty output.")
-    if "unique_id" not in tgpt.columns or "ds" not in tgpt.columns:
-        raise ValueError(f"TimeGPT output missing required columns. Got: {list(tgpt.columns)}")
-    tgpt["ds"] = pd.to_datetime(tgpt["ds"], errors="coerce")
-    if tgpt["ds"].isna().any():
-        raise ValueError("TimeGPT output has invalid ds values.")
-    out_col = None
-    for c in tgpt.columns:
-        if c.lower() in ("timegpt", "yhat", "y_hat", "forecast"):
-            out_col = c
-            break
-    if out_col is None:
-        non_id = [c for c in tgpt.columns if c not in ["unique_id", "ds"]]
-        if non_id:
-            out_col = non_id[0]
-    if out_col is None:
-        raise ValueError("TimeGPT output has no forecast column.")
-    if out_col != "timegpt":
-        tgpt = tgpt.rename(columns={out_col: "timegpt"})
-    last_ds = df_y.groupby("unique_id", as_index=False)["ds"].max().rename(columns={"ds": "last_ds"})
-    tgpt = tgpt.merge(last_ds, on="unique_id", how="inner")
-    tgpt = tgpt[tgpt["ds"] > tgpt["last_ds"]].copy()
-    tgpt = tgpt.sort_values(["unique_id", "ds"]).reset_index(drop=True)
-    tgpt = tgpt.groupby("unique_id", as_index=False).head(h).copy()
-    counts = tgpt.groupby("unique_id")["ds"].size()
-    missing_ids = sorted(set(last_ds["unique_id"]) - set(counts.index))
-    short_ids = sorted([uid for uid, c in counts.items() if c < h])
-    if missing_ids or short_ids:
-        parts = []
-        if missing_ids:
-            parts.append(f"Missing TimeGPT future rows for: {', '.join(missing_ids)}")
-        if short_ids:
-            parts.append(f"Not enough TimeGPT rows (need {h}) for: {', '.join(short_ids)}")
-        raise ValueError(" | ".join(parts))
-    tgpt = tgpt[["unique_id", "ds", "timegpt"]].copy()
-    return tgpt
-def _fit_predict_linear_recursive(df_y, X_future_h, xcols, h, model_kind, alpha, n_lags):
-    df_y = df_y.sort_values(["unique_id", "ds"]).reset_index(drop=True)
-    Xf = None
-    if xcols:
-        Xf = X_future_h.sort_values(["unique_id", "ds"]).reset_index(drop=True)
-    out_rows = []
-    for uid, g in df_y.groupby("unique_id"):
-        y_hist = g["y"].to_numpy(dtype=float)
-        if len(y_hist) <= n_lags + 5:
-            continue
-        if xcols:
-            xf_u = Xf[Xf["unique_id"] == uid].copy()
-            xf_u = xf_u.sort_values("ds")
-            if len(xf_u) != h:
-                raise ValueError(f"X_df horizon mismatch for {uid}. Expected {h}, got {len(xf_u)}")
-            x_future = xf_u[xcols].to_numpy(dtype=float)
-            ds_future = xf_u["ds"].to_numpy()
-        else:
-            ds_last = g["ds"].max()
-            ds_future = pd.date_range(ds_last + pd.Timedelta(days=1), periods=h, freq="D").to_numpy()
-            x_future = None
-        X_train = []
-        y_train = []
-        if xcols:
-            hist_exog = g[xcols].to_numpy(dtype=float)
-        for t in range(n_lags, len(y_hist)):
-            feats = []
-            feats.extend(y_hist[t - n_lags : t].tolist())
-            if xcols:
-                feats.extend(hist_exog[t].tolist())
-            X_train.append(feats)
-            y_train.append(y_hist[t])
-        X_train = np.asarray(X_train, dtype=float)
-        y_train = np.asarray(y_train, dtype=float)
-        if model_kind == "ridge":
-            mdl = SkRidge(alpha=float(alpha), random_state=0)
-        else:
-            mdl = SkLasso(alpha=float(alpha), random_state=0, max_iter=10000)
-        mdl.fit(X_train, y_train)
-        preds = []
-        y_buf = y_hist.copy()
-        for step in range(h):
-            feats = []
-            feats.extend(y_buf[-n_lags:].tolist())
-            if xcols:
-                feats.extend(x_future[step].tolist())
-            yhat = float(mdl.predict(np.asarray(feats, dtype=float).reshape(1, -1))[0])
-            preds.append(yhat)
-            y_buf = np.append(y_buf, yhat)
-        for i in range(h):
-            out_rows.append((uid, pd.to_datetime(ds_future[i]), preds[i]))
-    colname = model_kind
-    return pd.DataFrame(out_rows, columns=["unique_id", "ds", colname])
 def run_forecast(
     train_file,
     freq,
@@ -291,6 +242,7 @@ def run_forecast(
     cv_windows,
     future_h,
     loss_name,
     use_histavg,
     use_naive,
     use_snaive,
@@ -302,11 +254,6 @@ def run_forecast(
     nixtla_api_key,
     use_nf_lstm,
     use_tc_prophet,
-    use_ridge,
-    use_lasso,
-    ridge_alpha,
-    lasso_alpha,
-    linear_lags,
     xcols,
     future_x_file,
     last_eval,
@@ -338,7 +285,6 @@ def run_forecast(
         cv_step_size = _to_int(cv_step_size, "cv_step_size")
         cv_windows = _to_int(cv_windows, "cv_windows")
         future_h = _to_int(future_h, "future_h")
-        linear_lags = _to_int(linear_lags, "linear_lags")
         df, candidate_xcols, msg = load_training_data(train_file)
         if df is None:
@@ -348,7 +294,7 @@ def run_forecast(
         train_tail = _tail_history(df_y, n=PLOT_TAIL_POINTS)
         use_exog = bool(xcols)
-        needs_future_x = use_exog and (use_timegpt or use_nf_lstm or use_ridge or use_lasso)
         X_future_h = None
         df_exog = None
@@ -402,19 +348,22 @@ def run_forecast(
         future_parts = []
         if uni_models:
-            sf_u = StatsForecast(models=uni_models, freq=freq, n_jobs=-1)
-            cv_u = sf_u.cross_validation(df=df_y, h=cv_h, step_size=cv_step_size, n_windows=cv_windows)
-            ev_u = evaluate(cv_u, metrics=[loss_fn])
-            ev_u["pipeline"] = "univariate"
-            eval_parts.append(ev_u)
             fc_u = sf_u.forecast(df=df_y, h=future_h)
             future_parts.append(fc_u)
         if exog_models:
             if not use_exog or X_future_h is None or df_exog is None:
                 return keep("Exogenous model selected but predictors/X_df are missing.")
-            sf_x = StatsForecast(models=exog_models, freq=freq, n_jobs=-1)
             fc_x = sf_x.forecast(df=df_exog, h=future_h, X_df=X_future_h)
             future_parts.append(fc_x)
         if use_timegpt:
@@ -458,25 +407,23 @@ def run_forecast(
                     futr_exog_list = list(xcols)
                 model = LSTM(
                     h=future_h,
-                    max_steps=200,
-                    input_size=max(3 * future_h, 30),
-                    encoder_hidden_size=64,
-                    decoder_hidden_size=64,
-                    batch_size=32,
                     futr_exog_list=futr_exog_list,
                     alias="lstm",
                 )
                 nf = NeuralForecast(models=[model], freq=freq)
                 nf.fit(df=nf_df)
-                if use_exog:
-                    pred = nf.predict(futr_df=futr_df).reset_index(drop=False)
-                else:
-                    pred = nf.predict().reset_index(drop=False)
                 cols = [c for c in pred.columns if c not in ["unique_id", "ds"]]
                 if not cols:
                     return keep("LSTM produced no forecast columns.")
                 main_col = cols[0]
                 lstm_df = pred[["unique_id", "ds", main_col]].rename(columns={main_col: "lstm"})
                 future_parts.append(lstm_df)
             except Exception as e:
                 return keep(f"LSTM failed: {type(e).__name__}: {e}")
@@ -489,47 +436,11 @@ def run_forecast(
             try:
                 p = Prophet(alias="prophet")
                 prop = p.forecast(df=df_y, h=future_h, freq=freq)
-                prop = _ensure_pandas_df(prop)
-                if prop is None or prop.empty:
-                    return keep("Prophet returned empty output.")
-                if "prophet" not in prop.columns:
-                    non_id = [c for c in prop.columns if c not in ["unique_id", "ds"]]
-                    if not non_id:
-                        return keep(f"Prophet output missing forecast column. Got: {list(prop.columns)}")
-                    prop = prop.rename(columns={non_id[0]: "prophet"})
-                prop = prop[["unique_id", "ds", "prophet"]]
                 future_parts.append(prop)
             except Exception as e:
                 return keep(f"Prophet failed: {type(e).__name__}: {e}")
-        if use_ridge:
-            if use_exog and X_future_h is None:
-                return keep("Ridge selected with predictors but X_df is missing/invalid.")
-            ridge_df = _fit_predict_linear_recursive(
-                df_y=df_y,
-                X_future_h=X_future_h,
-                xcols=list(xcols) if use_exog else [],
-                h=future_h,
-                model_kind="ridge",
-                alpha=float(ridge_alpha),
-                n_lags=linear_lags,
-            )
-            future_parts.append(ridge_df)
-        if use_lasso:
-            if use_exog and X_future_h is None:
-                return keep("Lasso selected with predictors but X_df is missing/invalid.")
-            lasso_df = _fit_predict_linear_recursive(
-                df_y=df_y,
-                X_future_h=X_future_h,
-                xcols=list(xcols) if use_exog else [],
-                h=future_h,
-                model_kind="lasso",
-                alpha=float(lasso_alpha),
-                n_lags=linear_lags,
-            )
-            future_parts.append(lasso_df)
         if not future_parts and not eval_parts:
             return keep("No models selected.")
@@ -569,7 +480,7 @@ def on_train_upload(file_obj):
     return msg, gr.update(choices=xchoices, value=[])
-with gr.Blocks(title="StatsForecast + TimeGPT + Extra Models") as demo:
     gr.Markdown(
         """
 # Forecasting Demo
@@ -595,6 +506,7 @@ Optional predictors: extra columns in training CSV + X_df for horizon.
             cv_windows = gr.Number(value=3, label="CV windows", precision=0)
         future_h = gr.Number(value=30, label="Future forecast horizon", precision=0)
         loss_name = gr.Dropdown(choices=["rmse", "mae", "mse", "mape", "smape", "mase"], value="rmse", label="Metric")
     with gr.Accordion("StatsForecast models", open=True):
         with gr.Row():
@@ -612,15 +524,8 @@ Optional predictors: extra columns in training CSV + X_df for horizon.
     with gr.Accordion("Additional models", open=True):
         with gr.Row():
-            use_nf_lstm = gr.Checkbox(value=False, label="Nixtla NeuralForecast LSTM")
             use_tc_prophet = gr.Checkbox(value=False, label="TimeCopilot Prophet")
-        with gr.Row():
-            use_ridge = gr.Checkbox(value=False, label="Ridge (sklearn)")
-            use_lasso = gr.Checkbox(value=False, label="Lasso (sklearn)")
-        with gr.Row():
-            ridge_alpha = gr.Number(value=1.0, label="Ridge alpha", precision=6)
-            lasso_alpha = gr.Number(value=0.001, label="Lasso alpha", precision=6)
-            linear_lags = gr.Number(value=14, label="Linear model lags", precision=0)
     last_eval = gr.State(None)
     last_future = gr.State(None)
@@ -647,6 +552,7 @@ Optional predictors: extra columns in training CSV + X_df for horizon.
             cv_windows,
             future_h,
             loss_name,
             use_histavg,
             use_naive,
             use_snaive,
@@ -658,11 +564,6 @@ Optional predictors: extra columns in training CSV + X_df for horizon.
             nixtla_api_key,
             use_nf_lstm,
             use_tc_prophet,
-            use_ridge,
-            use_lasso,
-            ridge_alpha,
-            lasso_alpha,
-            linear_lags,
             xcols,
             future_x_file,
             last_eval,

 from utilsforecast.evaluation import evaluate
 from utilsforecast.losses import mae, mape, mase, mse, rmse, smape
 REQUIRED_COLS = ["unique_id", "ds", "y"]
 PLOT_TAIL_POINTS = 300
     return X_h.reset_index(drop=True)
+def _normalize_timegpt_output(tgpt_raw, df_y, h):
+    tgpt = _ensure_pandas_df(tgpt_raw)
+    if tgpt is None or tgpt.empty:
+        raise ValueError("TimeGPT returned empty output.")
+    if "unique_id" not in tgpt.columns or "ds" not in tgpt.columns:
+        raise ValueError(f"TimeGPT output missing required columns. Got: {list(tgpt.columns)}")
+    tgpt["ds"] = pd.to_datetime(tgpt["ds"], errors="coerce")
+    if tgpt["ds"].isna().any():
+        raise ValueError("TimeGPT output has invalid ds values.")
+    out_col = None
+    for c in tgpt.columns:
+        if c.lower() in ("timegpt", "yhat", "y_hat", "forecast"):
+            out_col = c
+            break
+    if out_col is None:
+        non_id = [c for c in tgpt.columns if c not in ["unique_id", "ds"]]
+        if non_id:
+            out_col = non_id[0]
+    if out_col is None:
+        raise ValueError("TimeGPT output has no forecast column.")
+    if out_col != "timegpt":
+        tgpt = tgpt.rename(columns={out_col: "timegpt"})
+    last_ds = df_y.groupby("unique_id", as_index=False)["ds"].max().rename(columns={"ds": "last_ds"})
+    tgpt = tgpt.merge(last_ds, on="unique_id", how="inner")
+    tgpt = tgpt[tgpt["ds"] > tgpt["last_ds"]].copy()
+    tgpt = tgpt.sort_values(["unique_id", "ds"]).reset_index(drop=True)
+    tgpt = tgpt.groupby("unique_id", as_index=False).head(h).copy()
+    counts = tgpt.groupby("unique_id")["ds"].size()
+    missing_ids = sorted(set(last_ds["unique_id"]) - set(counts.index))
+    short_ids = sorted([uid for uid, c in counts.items() if c < h])
+    if missing_ids or short_ids:
+        parts = []
+        if missing_ids:
+            parts.append(f"Missing TimeGPT future rows for: {', '.join(missing_ids)}")
+        if short_ids:
+            parts.append(f"Not enough TimeGPT rows (need {h}) for: {', '.join(short_ids)}")
+        raise ValueError(" | ".join(parts))
+    tgpt = tgpt[["unique_id", "ds", "timegpt"]].copy()
+    return tgpt
+def _ensure_forecast_cols(df_fc, colname):
+    if df_fc is None or df_fc.empty:
+        return None
+    df_fc = _ensure_pandas_df(df_fc)
+    if df_fc is None or df_fc.empty:
+        return None
+    if colname not in df_fc.columns:
+        non_id = [c for c in df_fc.columns if c not in ["unique_id", "ds"]]
+        if not non_id:
+            raise ValueError(f"Forecast output missing forecast column. Got: {list(df_fc.columns)}")
+        df_fc = df_fc.rename(columns={non_id[0]: colname})
+    df_fc = df_fc[["unique_id", "ds", colname]].copy()
+    df_fc["ds"] = pd.to_datetime(df_fc["ds"])
+    return df_fc
 def create_future_plot(fcst_df, original_df, title="Forecast"):
     if fcst_df is None or fcst_df.empty:
         return None
+    plt.figure(figsize=(12, 7))
     forecast_cols = [c for c in fcst_df.columns if c not in ["unique_id", "ds"]]
     unique_ids = fcst_df["unique_id"].unique()
     colors = plt.cm.tab10.colors
     fig = plt.gcf()
     fig.autofmt_xdate()
+    plt.close(fig)
     return fig
     return out
 def run_forecast(
     train_file,
     freq,
     cv_windows,
     future_h,
     loss_name,
+    run_cv,
     use_histavg,
     use_naive,
     use_snaive,
     nixtla_api_key,
     use_nf_lstm,
     use_tc_prophet,
     xcols,
     future_x_file,
     last_eval,
         cv_step_size = _to_int(cv_step_size, "cv_step_size")
         cv_windows = _to_int(cv_windows, "cv_windows")
         future_h = _to_int(future_h, "future_h")
         df, candidate_xcols, msg = load_training_data(train_file)
         if df is None:
         train_tail = _tail_history(df_y, n=PLOT_TAIL_POINTS)
         use_exog = bool(xcols)
+        needs_future_x = use_exog and (use_timegpt or use_nf_lstm or use_autoarima)
         X_future_h = None
         df_exog = None
         future_parts = []
         if uni_models:
+            sf_u = StatsForecast(models=uni_models, freq=freq, n_jobs=1)
+            if run_cv:
+                cv_u = sf_u.cross_validation(df=df_y, h=cv_h, step_size=cv_step_size, n_windows=cv_windows)
+                ev_u = evaluate(cv_u, metrics=[loss_fn])
+                ev_u["pipeline"] = "univariate"
+                eval_parts.append(ev_u)
             fc_u = sf_u.forecast(df=df_y, h=future_h)
+            fc_u = _ensure_forecast_cols(fc_u, "statsforecast")
             future_parts.append(fc_u)
         if exog_models:
             if not use_exog or X_future_h is None or df_exog is None:
                 return keep("Exogenous model selected but predictors/X_df are missing.")
+            sf_x = StatsForecast(models=exog_models, freq=freq, n_jobs=1)
             fc_x = sf_x.forecast(df=df_exog, h=future_h, X_df=X_future_h)
+            fc_x = _ensure_forecast_cols(fc_x, "autoarima")
             future_parts.append(fc_x)
         if use_timegpt:
                     futr_exog_list = list(xcols)
                 model = LSTM(
                     h=future_h,
+                    max_steps=80,
+                    input_size=max(2 * future_h, 30),
+                    encoder_hidden_size=32,
+                    decoder_hidden_size=32,
+                    batch_size=16,
                     futr_exog_list=futr_exog_list,
                     alias="lstm",
                 )
                 nf = NeuralForecast(models=[model], freq=freq)
                 nf.fit(df=nf_df)
+                pred = nf.predict(futr_df=futr_df).reset_index(drop=False) if use_exog else nf.predict().reset_index(drop=False)
                 cols = [c for c in pred.columns if c not in ["unique_id", "ds"]]
                 if not cols:
                     return keep("LSTM produced no forecast columns.")
                 main_col = cols[0]
                 lstm_df = pred[["unique_id", "ds", main_col]].rename(columns={main_col: "lstm"})
+                lstm_df["ds"] = pd.to_datetime(lstm_df["ds"])
                 future_parts.append(lstm_df)
             except Exception as e:
                 return keep(f"LSTM failed: {type(e).__name__}: {e}")
             try:
                 p = Prophet(alias="prophet")
                 prop = p.forecast(df=df_y, h=future_h, freq=freq)
+                prop = _ensure_forecast_cols(prop, "prophet")
                 future_parts.append(prop)
             except Exception as e:
                 return keep(f"Prophet failed: {type(e).__name__}: {e}")
         if not future_parts and not eval_parts:
             return keep("No models selected.")
     return msg, gr.update(choices=xchoices, value=[])
+with gr.Blocks(title="Forecasting Demo") as demo:
     gr.Markdown(
         """
 # Forecasting Demo
             cv_windows = gr.Number(value=3, label="CV windows", precision=0)
         future_h = gr.Number(value=30, label="Future forecast horizon", precision=0)
         loss_name = gr.Dropdown(choices=["rmse", "mae", "mse", "mape", "smape", "mase"], value="rmse", label="Metric")
+        run_cv = gr.Checkbox(value=False, label="Run cross-validation (slower)")
     with gr.Accordion("StatsForecast models", open=True):
         with gr.Row():
     with gr.Accordion("Additional models", open=True):
         with gr.Row():
+            use_nf_lstm = gr.Checkbox(value=False, label="NeuralForecast LSTM")
             use_tc_prophet = gr.Checkbox(value=False, label="TimeCopilot Prophet")
     last_eval = gr.State(None)
     last_future = gr.State(None)
             cv_windows,
             future_h,
             loss_name,
+            run_cv,
             use_histavg,
             use_naive,
             use_snaive,
             nixtla_api_key,
             use_nf_lstm,
             use_tc_prophet,
             xcols,
             future_x_file,
             last_eval,