""" Decomposition Explorer Interactive tool for exploring time-series decomposition methods. Part of ISA 444: Business Forecasting at Miami University (Spring 2026). Deployed to HuggingFace Spaces as fmegahed/decomposition-explorer. """ import io import warnings import gradio as gr import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt import numpy as np import pandas as pd from statsmodels.tsa.seasonal import STL, seasonal_decompose # --------------------------------------------------------------------------- # Color palette # --------------------------------------------------------------------------- CLR_PRIMARY = "#84d6d3" # teal CLR_ACCENT = "#C3142D" # Miami red CLR_TREND = "#C3142D" CLR_SEASON = "#84d6d3" CLR_RESID = "#666666" # --------------------------------------------------------------------------- # Built-in datasets # --------------------------------------------------------------------------- def _airline_passengers() -> pd.DataFrame: """Classic Box-Jenkins airline passengers (1949-1960, monthly).""" try: from statsmodels.datasets import co2 # noqa: F401 import statsmodels.api as sm data = sm.datasets.get_rdataset("AirPassengers", "datasets").data dates = pd.date_range(start="1949-01-01", periods=len(data), freq="MS") # Prefer common value column names from Rdatasets candidate_cols = [c for c in ["value", "passengers", "x"] if c in data.columns] if candidate_cols: y = pd.to_numeric(data[candidate_cols[0]], errors="coerce").to_numpy() else: # Fallback: take the last numeric column (and avoid obvious time columns) numeric_cols = data.select_dtypes(include=["number"]).columns.tolist() drop_cols = [c for c in ["time", "date", "year", "month"] if c in numeric_cols] numeric_cols = [c for c in numeric_cols if c not in drop_cols] if not numeric_cols: raise ValueError(f"Could not identify value column in AirPassengers data: {list(data.columns)}") y = pd.to_numeric(data[numeric_cols[-1]], errors="coerce").to_numpy() return pd.DataFrame({"ds": dates, "y": y}) except Exception: # Fallback: generate the well-known series manually np.random.seed(0) dates = pd.date_range("1949-01-01", "1960-12-01", freq="MS") n = len(dates) t = np.arange(n) trend = 110 + 2.5 * t seasonal_pattern = np.array( [-24, -20, 2, -1, -5, 30, 47, 46, 14, -10, -25, -26] ) season = np.tile(seasonal_pattern, n // 12 + 1)[:n] noise = np.random.normal(0, 6, n) y = trend + season * (1 + 0.02 * t) + noise return pd.DataFrame({"ds": dates, "y": np.round(y, 1)}) def _us_retail_employment() -> pd.DataFrame: """Realistic synthetic monthly US retail employment (2000-2024).""" np.random.seed(42) dates = pd.date_range("2000-01-01", "2024-12-01", freq="MS") n = len(dates) t = np.arange(n) # Trend: upward with dips around 2008-09 and 2020 trend = 15_000 + 12 * t # 2008-2009 recession dip recession_08 = -1400 * np.exp(-0.5 * ((t - 108) / 8) ** 2) # 2020 COVID dip covid_20 = -2800 * np.exp(-0.5 * ((t - 243) / 3) ** 2) trend = trend + recession_08 + covid_20 # Seasonal pattern (retail peaks in Nov-Dec) seasonal_pattern = np.array( [-200, -350, -100, 50, 100, 150, 100, 80, -50, -100, 250, 500] ) season = np.tile(seasonal_pattern, n // 12 + 1)[:n] noise = np.random.normal(0, 60, n) y = trend + season + noise return pd.DataFrame({"ds": dates, "y": np.round(y, 1)}) def _ohio_nonfarm() -> pd.DataFrame: """Realistic synthetic monthly Ohio nonfarm employment (2010-2024).""" np.random.seed(7) dates = pd.date_range("2010-01-01", "2024-12-01", freq="MS") n = len(dates) t = np.arange(n) trend = 5_100 + 4.5 * t # COVID dip covid = -650 * np.exp(-0.5 * ((t - 123) / 3) ** 2) trend = trend + covid seasonal_pattern = np.array( [-80, -50, 30, 50, 70, 60, 20, 10, 30, 20, -30, -60] ) season = np.tile(seasonal_pattern, n // 12 + 1)[:n] noise = np.random.normal(0, 25, n) y = trend + season + noise return pd.DataFrame({"ds": dates, "y": np.round(y, 1)}) BUILTIN_DATASETS = { "Airline Passengers": _airline_passengers, "US Retail Employment": _us_retail_employment, "Ohio Nonfarm Employment": _ohio_nonfarm, } # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- def _load_dataset(name: str, csv_file) -> pd.DataFrame: """Return a DataFrame with columns ds (datetime) and y (float).""" if csv_file is not None: try: raw = pd.read_csv(csv_file.name if hasattr(csv_file, "name") else csv_file) if "ds" not in raw.columns or "y" not in raw.columns: raise ValueError("CSV must contain columns 'ds' and 'y'.") raw["ds"] = pd.to_datetime(raw["ds"]) raw["y"] = pd.to_numeric(raw["y"], errors="coerce") raw = raw.dropna(subset=["y"]).sort_values("ds").reset_index(drop=True) return raw except Exception as exc: raise gr.Error(f"Could not read uploaded CSV: {exc}") if name in BUILTIN_DATASETS: return BUILTIN_DATASETS[name]() raise gr.Error(f"Unknown dataset: {name}") def _ensure_odd(val: int) -> int: """Force a value to be odd (required by statsmodels windows).""" val = int(val) return val if val % 2 == 1 else val + 1 def _strength(residual: np.ndarray, component_plus_residual: np.ndarray) -> float: """Compute strength of a component: max(0, 1 - Var(R)/Var(C+R)).""" var_r = np.nanvar(residual) var_cr = np.nanvar(component_plus_residual) if var_cr == 0: return 0.0 return float(max(0.0, 1.0 - var_r / var_cr)) # --------------------------------------------------------------------------- # Core decomposition + plotting # --------------------------------------------------------------------------- def decompose_and_plot( dataset_name: str, csv_file, method: str, period: int, stl_seasonal: int, stl_trend: int, stl_robust: bool, ): """Run decomposition and return (matplotlib Figure, summary string).""" # --- Load data -------------------------------------------------------- df = _load_dataset(dataset_name, csv_file) if len(df) < 2 * period: raise gr.Error( f"Not enough observations ({len(df)}) for the chosen period ({period}). " f"Need at least {2 * period} observations." ) y_series = pd.Series(df["y"].values, index=df["ds"]) # --- Decompose -------------------------------------------------------- with warnings.catch_warnings(): warnings.simplefilter("ignore") if method == "STL": stl_seasonal = _ensure_odd(stl_seasonal) stl_trend_val = _ensure_odd(stl_trend) if stl_trend > 0 else None stl_obj = STL( y_series, period=int(period), seasonal=stl_seasonal, trend=stl_trend_val, robust=bool(stl_robust), ) result = stl_obj.fit() else: model_type = "additive" if "Additive" in method else "multiplicative" result = seasonal_decompose( y_series, model=model_type, period=int(period) ) observed = result.observed trend = result.trend seasonal = result.seasonal resid = result.resid # --- Strength measures ------------------------------------------------ r = resid.values t = trend.values s = seasonal.values mask = ~(np.isnan(r) | np.isnan(t) | np.isnan(s)) r_clean = r[mask] t_clean = t[mask] s_clean = s[mask] f_trend = _strength(r_clean, t_clean + r_clean) f_season = _strength(r_clean, s_clean + r_clean) # --- Plot ------------------------------------------------------------- fig, axes = plt.subplots(4, 1, figsize=(10, 8), sharex=True) fig.patch.set_facecolor("white") for ax in axes: ax.set_facecolor("white") ax.grid(True, linewidth=0.3, alpha=0.5) dates = observed.index # 1. Observed axes[0].plot(dates, observed, color=CLR_PRIMARY, linewidth=1.2) axes[0].set_ylabel("Observed", fontsize=10, fontweight="bold") # 2. Trend axes[1].plot(dates, trend, color=CLR_TREND, linewidth=1.4) axes[1].set_ylabel("Trend", fontsize=10, fontweight="bold") # 3. Seasonal axes[2].plot(dates, seasonal, color=CLR_SEASON, linewidth=1.0) axes[2].set_ylabel("Seasonal", fontsize=10, fontweight="bold") # 4. Residual axes[3].plot(dates, resid, color=CLR_RESID, linewidth=0.8, alpha=0.8) axes[3].set_ylabel("Remainder", fontsize=10, fontweight="bold") axes[3].set_xlabel("Date", fontsize=10) method_label = method if method == "STL" else method.replace("Classical ", "Classical – ") fig.suptitle( f"Decomposition · {method_label} · period = {period}", fontsize=13, fontweight="bold", y=0.98, ) fig.tight_layout(rect=[0, 0, 1, 0.96]) # --- Summary text ----------------------------------------------------- summary = ( f"Strength of Trend (F_T): {f_trend:.4f}\n" f"Strength of Seasonality (F_S): {f_season:.4f}\n\n" f"Formulas:\n" f" F_T = max(0, 1 − Var(R) / Var(T + R))\n" f" F_S = max(0, 1 − Var(R) / Var(S + R))" ) return fig, summary # --------------------------------------------------------------------------- # Gradio UI # --------------------------------------------------------------------------- _THEME = gr.themes.Soft( primary_hue=gr.themes.Color( c50="#fef2f3", c100="#fde6e8", c200="#fbd0d5", c300="#f7a4ae", c400="#f17182", c500="#C3142D", c600="#b01228", c700="#8B0E1E", c800="#6e0b18", c900="#5c0d17", c950="#33040a", ), secondary_hue=gr.themes.Color( c50="#fef2f3", c100="#fde6e8", c200="#fbd0d5", c300="#f7a4ae", c400="#f17182", c500="#C3142D", c600="#b01228", c700="#8B0E1E", c800="#6e0b18", c900="#5c0d17", c950="#33040a", ), neutral_hue=gr.themes.Color( c50="#EDECE2", c100="#E5E4D9", c200="#DDDCD0", c300="#C8C7BC", c400="#A3A299", c500="#858479", c600="#6B6A61", c700="#53524B", c800="#3B3A35", c900="#252420", c950="#151410", ), font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui", "sans-serif"], ) _CSS = """ .gradio-container { max-width: 1280px !important; margin: auto; } footer { display: none !important; } .gr-button-primary { background: #C3142D !important; border: none !important; } .gr-button-primary:hover { background: #8B0E1E !important; } .gr-button-secondary { border-color: #C3142D !important; color: #C3142D !important; } .gr-button-secondary:hover { background: #8B0E1E !important; color: white !important; } .gr-input:focus { border-color: #C3142D !important; box-shadow: 0 0 0 2px rgba(195,20,45,0.2) !important; } """ def build_app() -> gr.Blocks: with gr.Blocks(title="Decomposition Explorer v1.0") as app: gr.HTML("""
Miami University

Decomposition Explorer v1.0

ISA 444: Business Forecasting · Farmer School of Business · Miami University

""") gr.HTML("""
Interactive tool for exploring time-series decomposition methods (Classical and STL). Choose a built-in dataset or upload your own CSV, adjust decomposition parameters, and examine trend, seasonal, and remainder components along with strength measures.
""") with gr.Row(): # --- Left column: controls ------------------------------------ with gr.Column(scale=1, min_width=280): dataset_dd = gr.Dropdown( label="Dataset", choices=list(BUILTIN_DATASETS.keys()), value="Airline Passengers", ) csv_upload = gr.File( label="Or upload CSV (columns: ds, y)", file_types=[".csv"], type="filepath", ) method_radio = gr.Radio( label="Decomposition Method", choices=[ "Classical (Additive)", "Classical (Multiplicative)", "STL", ], value="STL", ) period_slider = gr.Slider( label="Period / Season Length", minimum=2, maximum=52, step=1, value=12, ) # STL-specific controls stl_group = gr.Group(visible=True) with stl_group: gr.Markdown("**STL Parameters**") stl_seasonal_slider = gr.Slider( label="seasonal (seasonality window, odd)", minimum=7, maximum=51, step=2, value=13, ) stl_trend_slider = gr.Slider( label="trend (trend window, odd; 0 = auto)", minimum=0, maximum=101, step=2, value=0, ) stl_robust_cb = gr.Checkbox( label="robust (robust to outliers)", value=False, ) # --- Right column: output ------------------------------------- with gr.Column(scale=3): plot_output = gr.Plot(label="Decomposition") summary_box = gr.Textbox( label="Strength Measures", lines=5, interactive=False, ) # --- Visibility toggle for STL controls --------------------------- def toggle_stl(method): return gr.Group(visible=(method == "STL")) method_radio.change( fn=toggle_stl, inputs=[method_radio], outputs=[stl_group], ) # --- Gather all inputs -------------------------------------------- all_inputs = [ dataset_dd, csv_upload, method_radio, period_slider, stl_seasonal_slider, stl_trend_slider, stl_robust_cb, ] all_outputs = [plot_output, summary_box] # --- Wire change events ------------------------------------------- for ctrl in all_inputs: ctrl.change( fn=decompose_and_plot, inputs=all_inputs, outputs=all_outputs, ) # --- Initial load ------------------------------------------------- app.load( fn=decompose_and_plot, inputs=all_inputs, outputs=all_outputs, ) gr.HTML("""
Developed by Fadel M. Megahed · Gloss Professor of Analytics · Miami University
Version 1.0.0 · Spring 2026 · GitHub · LinkedIn
""") return app # --------------------------------------------------------------------------- # Entry point # --------------------------------------------------------------------------- if __name__ == "__main__": demo = build_app() demo.launch(theme=_THEME, css=_CSS, ssr_mode=False, allowed_paths=["beveled-m-min-size.png"])