""" Decomposition Explorer Interactive tool for exploring time-series decomposition methods. Part of ISA 444: Business Forecasting at Miami University (Spring 2026). Deployed to HuggingFace Spaces as fmegahed/decomposition-explorer. """ import io import warnings import gradio as gr import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt import numpy as np import pandas as pd from statsmodels.tsa.seasonal import STL, seasonal_decompose # --------------------------------------------------------------------------- # Color palette # --------------------------------------------------------------------------- CLR_PRIMARY = "#84d6d3" # teal CLR_ACCENT = "#C3142D" # Miami red CLR_TREND = "#C3142D" CLR_SEASON = "#84d6d3" CLR_RESID = "#666666" # --------------------------------------------------------------------------- # Built-in datasets # --------------------------------------------------------------------------- def _airline_passengers() -> pd.DataFrame: """Classic Box-Jenkins airline passengers (1949-1960, monthly).""" try: from statsmodels.datasets import co2 # noqa: F401 import statsmodels.api as sm data = sm.datasets.get_rdataset("AirPassengers", "datasets").data dates = pd.date_range(start="1949-01-01", periods=len(data), freq="MS") # Prefer common value column names from Rdatasets candidate_cols = [c for c in ["value", "passengers", "x"] if c in data.columns] if candidate_cols: y = pd.to_numeric(data[candidate_cols[0]], errors="coerce").to_numpy() else: # Fallback: take the last numeric column (and avoid obvious time columns) numeric_cols = data.select_dtypes(include=["number"]).columns.tolist() drop_cols = [c for c in ["time", "date", "year", "month"] if c in numeric_cols] numeric_cols = [c for c in numeric_cols if c not in drop_cols] if not numeric_cols: raise ValueError(f"Could not identify value column in AirPassengers data: {list(data.columns)}") y = pd.to_numeric(data[numeric_cols[-1]], errors="coerce").to_numpy() return pd.DataFrame({"ds": dates, "y": y}) except Exception: # Fallback: generate the well-known series manually np.random.seed(0) dates = pd.date_range("1949-01-01", "1960-12-01", freq="MS") n = len(dates) t = np.arange(n) trend = 110 + 2.5 * t seasonal_pattern = np.array( [-24, -20, 2, -1, -5, 30, 47, 46, 14, -10, -25, -26] ) season = np.tile(seasonal_pattern, n // 12 + 1)[:n] noise = np.random.normal(0, 6, n) y = trend + season * (1 + 0.02 * t) + noise return pd.DataFrame({"ds": dates, "y": np.round(y, 1)}) def _us_retail_employment() -> pd.DataFrame: """Realistic synthetic monthly US retail employment (2000-2024).""" np.random.seed(42) dates = pd.date_range("2000-01-01", "2024-12-01", freq="MS") n = len(dates) t = np.arange(n) # Trend: upward with dips around 2008-09 and 2020 trend = 15_000 + 12 * t # 2008-2009 recession dip recession_08 = -1400 * np.exp(-0.5 * ((t - 108) / 8) ** 2) # 2020 COVID dip covid_20 = -2800 * np.exp(-0.5 * ((t - 243) / 3) ** 2) trend = trend + recession_08 + covid_20 # Seasonal pattern (retail peaks in Nov-Dec) seasonal_pattern = np.array( [-200, -350, -100, 50, 100, 150, 100, 80, -50, -100, 250, 500] ) season = np.tile(seasonal_pattern, n // 12 + 1)[:n] noise = np.random.normal(0, 60, n) y = trend + season + noise return pd.DataFrame({"ds": dates, "y": np.round(y, 1)}) def _ohio_nonfarm() -> pd.DataFrame: """Realistic synthetic monthly Ohio nonfarm employment (2010-2024).""" np.random.seed(7) dates = pd.date_range("2010-01-01", "2024-12-01", freq="MS") n = len(dates) t = np.arange(n) trend = 5_100 + 4.5 * t # COVID dip covid = -650 * np.exp(-0.5 * ((t - 123) / 3) ** 2) trend = trend + covid seasonal_pattern = np.array( [-80, -50, 30, 50, 70, 60, 20, 10, 30, 20, -30, -60] ) season = np.tile(seasonal_pattern, n // 12 + 1)[:n] noise = np.random.normal(0, 25, n) y = trend + season + noise return pd.DataFrame({"ds": dates, "y": np.round(y, 1)}) BUILTIN_DATASETS = { "Airline Passengers": _airline_passengers, "US Retail Employment": _us_retail_employment, "Ohio Nonfarm Employment": _ohio_nonfarm, } # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- def _load_dataset(name: str, csv_file) -> pd.DataFrame: """Return a DataFrame with columns ds (datetime) and y (float).""" if csv_file is not None: try: raw = pd.read_csv(csv_file.name if hasattr(csv_file, "name") else csv_file) if "ds" not in raw.columns or "y" not in raw.columns: raise ValueError("CSV must contain columns 'ds' and 'y'.") raw["ds"] = pd.to_datetime(raw["ds"]) raw["y"] = pd.to_numeric(raw["y"], errors="coerce") raw = raw.dropna(subset=["y"]).sort_values("ds").reset_index(drop=True) return raw except Exception as exc: raise gr.Error(f"Could not read uploaded CSV: {exc}") if name in BUILTIN_DATASETS: return BUILTIN_DATASETS[name]() raise gr.Error(f"Unknown dataset: {name}") def _ensure_odd(val: int) -> int: """Force a value to be odd (required by statsmodels windows).""" val = int(val) return val if val % 2 == 1 else val + 1 def _strength(residual: np.ndarray, component_plus_residual: np.ndarray) -> float: """Compute strength of a component: max(0, 1 - Var(R)/Var(C+R)).""" var_r = np.nanvar(residual) var_cr = np.nanvar(component_plus_residual) if var_cr == 0: return 0.0 return float(max(0.0, 1.0 - var_r / var_cr)) # --------------------------------------------------------------------------- # Core decomposition + plotting # --------------------------------------------------------------------------- def decompose_and_plot( dataset_name: str, csv_file, method: str, period: int, stl_seasonal: int, stl_trend: int, stl_robust: bool, ): """Run decomposition and return (matplotlib Figure, summary string).""" # --- Load data -------------------------------------------------------- df = _load_dataset(dataset_name, csv_file) if len(df) < 2 * period: raise gr.Error( f"Not enough observations ({len(df)}) for the chosen period ({period}). " f"Need at least {2 * period} observations." ) y_series = pd.Series(df["y"].values, index=df["ds"]) # --- Decompose -------------------------------------------------------- with warnings.catch_warnings(): warnings.simplefilter("ignore") if method == "STL": stl_seasonal = _ensure_odd(stl_seasonal) stl_trend_val = _ensure_odd(stl_trend) if stl_trend > 0 else None stl_obj = STL( y_series, period=int(period), seasonal=stl_seasonal, trend=stl_trend_val, robust=bool(stl_robust), ) result = stl_obj.fit() else: model_type = "additive" if "Additive" in method else "multiplicative" result = seasonal_decompose( y_series, model=model_type, period=int(period) ) observed = result.observed trend = result.trend seasonal = result.seasonal resid = result.resid # --- Strength measures ------------------------------------------------ r = resid.values t = trend.values s = seasonal.values mask = ~(np.isnan(r) | np.isnan(t) | np.isnan(s)) r_clean = r[mask] t_clean = t[mask] s_clean = s[mask] f_trend = _strength(r_clean, t_clean + r_clean) f_season = _strength(r_clean, s_clean + r_clean) # --- Plot ------------------------------------------------------------- fig, axes = plt.subplots(4, 1, figsize=(10, 8), sharex=True) fig.patch.set_facecolor("white") for ax in axes: ax.set_facecolor("white") ax.grid(True, linewidth=0.3, alpha=0.5) dates = observed.index # 1. Observed axes[0].plot(dates, observed, color=CLR_PRIMARY, linewidth=1.2) axes[0].set_ylabel("Observed", fontsize=10, fontweight="bold") # 2. Trend axes[1].plot(dates, trend, color=CLR_TREND, linewidth=1.4) axes[1].set_ylabel("Trend", fontsize=10, fontweight="bold") # 3. Seasonal axes[2].plot(dates, seasonal, color=CLR_SEASON, linewidth=1.0) axes[2].set_ylabel("Seasonal", fontsize=10, fontweight="bold") # 4. Residual axes[3].plot(dates, resid, color=CLR_RESID, linewidth=0.8, alpha=0.8) axes[3].set_ylabel("Remainder", fontsize=10, fontweight="bold") axes[3].set_xlabel("Date", fontsize=10) method_label = method if method == "STL" else method.replace("Classical ", "Classical – ") fig.suptitle( f"Decomposition · {method_label} · period = {period}", fontsize=13, fontweight="bold", y=0.98, ) fig.tight_layout(rect=[0, 0, 1, 0.96]) # --- Summary text ----------------------------------------------------- summary = ( f"Strength of Trend (F_T): {f_trend:.4f}\n" f"Strength of Seasonality (F_S): {f_season:.4f}\n\n" f"Formulas:\n" f" F_T = max(0, 1 − Var(R) / Var(T + R))\n" f" F_S = max(0, 1 − Var(R) / Var(S + R))" ) return fig, summary # --------------------------------------------------------------------------- # Gradio UI # --------------------------------------------------------------------------- _THEME = gr.themes.Soft( primary_hue=gr.themes.Color( c50="#fef2f3", c100="#fde6e8", c200="#fbd0d5", c300="#f7a4ae", c400="#f17182", c500="#C3142D", c600="#b01228", c700="#8B0E1E", c800="#6e0b18", c900="#5c0d17", c950="#33040a", ), secondary_hue=gr.themes.Color( c50="#fef2f3", c100="#fde6e8", c200="#fbd0d5", c300="#f7a4ae", c400="#f17182", c500="#C3142D", c600="#b01228", c700="#8B0E1E", c800="#6e0b18", c900="#5c0d17", c950="#33040a", ), neutral_hue=gr.themes.Color( c50="#EDECE2", c100="#E5E4D9", c200="#DDDCD0", c300="#C8C7BC", c400="#A3A299", c500="#858479", c600="#6B6A61", c700="#53524B", c800="#3B3A35", c900="#252420", c950="#151410", ), font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui", "sans-serif"], ) _CSS = """ .gradio-container { max-width: 1280px !important; margin: auto; } footer { display: none !important; } .gr-button-primary { background: #C3142D !important; border: none !important; } .gr-button-primary:hover { background: #8B0E1E !important; } .gr-button-secondary { border-color: #C3142D !important; color: #C3142D !important; } .gr-button-secondary:hover { background: #8B0E1E !important; color: white !important; } .gr-input:focus { border-color: #C3142D !important; box-shadow: 0 0 0 2px rgba(195,20,45,0.2) !important; } """ def build_app() -> gr.Blocks: with gr.Blocks(title="Decomposition Explorer v1.0") as app: gr.HTML("""
ISA 444: Business Forecasting · Farmer School of Business · Miami University