Spaces:

QuantumLearner
/

Space75

Sleeping

App Files Files Community

QuantumLearner commited on Aug 18, 2025

Commit

e0419d5

verified ·

1 Parent(s): 23d5287

Update app.py

Browse files

Files changed (1) hide show

app.py +444 -345

app.py CHANGED Viewed

@@ -1,4 +1,9 @@
-# app.py — Volatility Mean-Reversion
 import io
 from datetime import datetime, timedelta
@@ -6,28 +11,26 @@ import numpy as np
 import pandas as pd
 import streamlit as st
 import yfinance as yf
 import statsmodels.api as sm
 from statsmodels.tsa.stattools import adfuller
 from statsmodels.tsa.regime_switching.markov_regression import MarkovRegression
-from plotly.subplots import make_subplots
-import plotly.graph_objects as go
-# ============================== Page config ===============================
 st.set_page_config(page_title="Volatility Mean-Reversion", layout="wide")
 st.title("Volatility Mean-Reversion")
-st.markdown(
-    "Compare **implied** volatility (VIX) with **realized** volatility of the S&P 500, "
-    "test for stationarity, estimate mean-reversion speeds (AR(1) and OU), and detect "
-    "high/low-volatility regimes with a 2-state Markov model."
 )
-# ================================ Sidebar ================================
 with st.sidebar:
     st.header("Controls")
-    # Data window
     with st.expander("Data Window", expanded=False):
         default_start = datetime(2015, 1, 1).date()
         default_end = (datetime.today().date() + timedelta(days=1))
@@ -43,414 +46,510 @@ with st.sidebar:
             value=default_end,
             min_value=default_start,
             max_value=default_end,
-            help="Default is today + 1 to include latest close."
         )
-    # Realized vol settings
-    with st.expander("Realized Volatility", expanded=False):
         rv_window = st.number_input(
             "Realized-vol window (days)",
-            value=21, min_value=5, max_value=63, step=1,
-            help="Rolling window for realized volatility (annualized)."
         )
         scale_mode = st.selectbox(
-            "Scaling of realized vol vs VIX",
-            options=["Auto (match means)", "Manual factor"],
-            index=0,
-            help="Aligns realized vol to VIX scale for the gap chart."
         )
-        scale_factor = 1.0
-        if scale_mode == "Manual factor":
-            scale_factor = st.number_input(
-                "Manual scaling factor",
-                value=1.0, min_value=0.1, max_value=10.0, step=0.1,
-                help="Multiply realized vol by this factor before comparing to VIX."
-            )
-    # AR(1) & OU
-    with st.expander("Mean-Reversion", expanded=False):
         ou_roll_window = st.number_input(
             "OU rolling window (days)",
-            value=252, min_value=63, max_value=756, step=21,
-            help="Lookback for rolling OU half-life estimates."
         )
-    # Markov switching
-    with st.expander("Regime Model", expanded=False):
-        ms_states = st.number_input(
-            "Regimes (fixed at 2)",
-            value=2, min_value=2, max_value=2, step=0,
-            help="Two regimes: low volatility and high volatility."
         )
     run_btn = st.button("Run Analysis", type="primary")
-# ================================ Caching ================================
 @st.cache_data(show_spinner=False)
 def fetch_yf_close(tickers: list[str], start: str, end: str) -> pd.DataFrame:
     """
-    Cached Yahoo Finance close prices for the requested tickers/date range.
     """
-    data = yf.download(
-        tickers, start=start, end=end, progress=False, auto_adjust=False
-    )
-    # Flatten potential MultiIndex columns (['Close']['^VIX'])
     if isinstance(data.columns, pd.MultiIndex):
-        data.columns = [" ".join([str(c) for c in tup]).strip() for tup in data.columns]
-    # Keep closes only
-    cols = [c for c in data.columns if "Close" in c]
-    out = data[cols].copy()
-    # rename standard tickers
-    rename = {}
-    for c in out.columns:
-        if "^VIX" in c: rename[c] = "VIX"
-        if "^GSPC" in c: rename[c] = "SPX"
-    out = out.rename(columns=rename)
     return out.sort_index().ffill()
-# =============== Small helpers ===============
-def _date_str(d): return pd.to_datetime(d).strftime("%Y-%m-%d")
-def _tickformatstops():
-    # dynamic, granular date ticks on zoom
-    day   = 24*3600*1000
-    week  = 7*day
     return [
-        dict(dtickrange=[None, day],   value="%b %d\n%Y"),
-        dict(dtickrange=[day, week],   value="%b %d"),
-        dict(dtickrange=[week, "M1"],  value="%b %d\n%Y"),
-        dict(dtickrange=["M1", "M6"],  value="%b %Y"),
-        dict(dtickrange=["M6", None],  value="%Y"),
     ]
-def _white_axes(fig):
-    fig.update_layout(template="plotly_dark", font=dict(color="white"))
-    if hasattr(fig.layout, "annotations"):
-        for a in fig.layout.annotations:
-            a.font = dict(color="white", size=12)
-    fig.update_xaxes(
-        ticklabelmode="period",
-        tickformatstops=_tickformatstops(),
-        tickangle=0,
-        tickfont=dict(color="white"),
-        title_font=dict(color="white"),
-        showgrid=True, gridcolor="rgba(160,160,160,0.2)",
-        showline=True, linecolor="rgba(255,255,255,0.4)"
-    )
-    fig.update_yaxes(
-        tickfont=dict(color="white"),
-        title_font=dict(color="white"),
-        showgrid=True, gridcolor="rgba(160,160,160,0.2)",
-        showline=True, linecolor="rgba(255,255,255,0.4)"
-    )
-    return fig
-# =============================== Run =====================================
-if run_btn:
-    # ---------- Data ----------
-    start_str, end_str = _date_str(start_date), _date_str(end_date)
-    with st.spinner("Fetching VIX & SPX…"):
-        px = fetch_yf_close(["^VIX", "^GSPC"], start_str, end_str)
-    if px.empty or "VIX" not in px or "SPX" not in px:
-        st.error("Couldn’t fetch VIX/SPX. Adjust dates and try again.")
         st.stop()
-    vix = px["VIX"].copy()
-    spx = px["SPX"].copy()
-    # Realized vol from SPX log returns
     log_ret = np.log(spx).diff()
     rv = log_ret.rolling(int(rv_window)).std() * np.sqrt(252)
     rv = rv.dropna()
-    # Align VIX to realized-vol index
     vix = vix.reindex(rv.index).ffill()
-    # Scaling
     if scale_mode.startswith("Auto"):
-        sf = float(vix.mean() / rv.mean()) if rv.mean() != 0 else 1.0
     else:
         sf = float(scale_factor)
     rv_scaled = rv * sf
     diff = vix - rv_scaled
-    # ===================== SECTION 1 — Gap: VIX vs Realized Vol =====================
-    st.header("1) Implied vs Realized Volatility")
     with st.expander("Methodology", expanded=False):
-        st.write("**Goal:** Compare implied volatility (VIX) to realized volatility of the S&P 500 and track their gap.")
-        st.write("**Realized volatility** (annualized) from log returns over window \(w\):")
-        st.latex(r"r_t=\ln\frac{P_t}{P_{t-1}},\qquad \sigma^{(w)}_t=\sqrt{252}\cdot \operatorname{stdev}\!\big(r_{t-w+1},\dots,r_t\big)")
-        st.write("**Scaling** aligns magnitudes for comparison:")
-        st.latex(r"\tilde{\sigma}_t = s \cdot \sigma^{(w)}_t,\quad s=\frac{\overline{\text{VIX}}}{\overline{\sigma^{(w)}}}\ \ \text{(auto, or manual factor)}")
-        st.write("**Gap (VIX – scaled realized)**:")
-        st.latex(r"\Delta_t=\text{VIX}_t-\tilde{\sigma}_t")
-        st.write(
-            "- **Δ > 0**: implied > realized → options rich / risk premium elevated.\n"
-            "- **Δ < 0**: realized ≥ implied → options cheap vs recent movement."
-        )
-    fig1 = make_subplots(rows=2, cols=1, shared_xaxes=True,
-                         subplot_titles=("VIX vs Scaled Realized Vol", "Gap: VIX – Scaled Realized Vol"))
-    fig1.add_trace(go.Scatter(x=vix.index, y=vix, name="VIX"), row=1, col=1)
-    fig1.add_trace(go.Scatter(x=rv_scaled.index, y=rv_scaled, name=f"Realized Vol × {sf:.2f}"), row=1, col=1)
-    fig1.add_trace(go.Scatter(x=diff.index, y=diff, name="Gap Δ"), row=2, col=1)
-    fig1.add_hline(y=0, line_dash="dash", line_color="rgba(180,180,180,0.8)", row=2, col=1)
-    fig1.update_yaxes(title_text="Vol level", row=1, col=1)
-    fig1.update_yaxes(title_text="Δ (points)", row=2, col=1)
-    fig1.update_xaxes(title_text="Date", row=2, col=1)
-    fig1.update_layout(height=600, legend=dict(orientation="h", y=1.05, x=0))
-    _white_axes(fig1)
-    st.plotly_chart(fig1, use_container_width=True)
-    # Short read
-    with st.expander("Quick Read (current)", expanded=False):
-        last_dt = rv.index[-1].date()
-        st.write(
-            f"As of **{last_dt}**: VIX = **{float(vix.iloc[-1]):.2f}**, "
-            f"scaled realized = **{float(rv_scaled.iloc[-1]):.2f}**, gap Δ = **{float(diff.iloc[-1]):+.2f}**."
-        )
-    # ===================== SECTION 2 — Stationarity (ADF) & Rolling Diagnostics =====================
-    st.header("2) Stationarity & Rolling Diagnostics (log-vol)")
-    with st.expander("Methodology", expanded=False):
-        st.write("Test whether log-vol processes are stationary (mean-reverting) and show rolling mean/std.")
-        st.write("**Log transform:**")
-        st.latex(r"x_t=\ln(\text{VIX}_t),\qquad y_t=\ln(\sigma^{(w)}_t)")
-        st.write("**ADF test** (null: unit root / non-stationary):")
-        st.latex(r"\Delta z_t=\alpha+\beta t+\gamma z_{t-1}+\sum_{i=1}^{p}\phi_i \Delta z_{t-i}+\varepsilon_t\quad\text{(test } \gamma=0\text{)}")
-        st.write("Rejecting the null suggests stationarity (mean-reversion).")
-        st.write("We also show **252-day rolling mean** and **std** of log-vol.")
-    log_vix = np.log(vix).dropna()
-    log_rv = np.log(rv).dropna()
-    # Align for plotting windows
-    w_roll = 252
-    # ADF
-    adf_v = adfuller(log_vix, autolag="AIC")
-    adf_r = adfuller(log_rv, autolag="AIC")
-    # Rolling diag figure
-    fig2 = make_subplots(rows=2, cols=1, shared_xaxes=True,
-                         subplot_titles=("log(VIX) with 252-day Rolling Mean/Std",
-                                         "log(Realized Vol) with 252-day Rolling Mean/Std"))
-    fig2.add_trace(go.Scatter(x=log_vix.index, y=log_vix, name="log(VIX)"), row=1, col=1)
-    fig2.add_trace(go.Scatter(x=log_vix.index, y=log_vix.rolling(w_roll).mean(), name="Rolling Mean", line=dict(dash="dash")), row=1, col=1)
-    fig2.add_trace(go.Scatter(x=log_vix.index, y=log_vix.rolling(w_roll).std(), name="Rolling Std", line=dict(dash="dot")), row=1, col=1)
-    fig2.add_trace(go.Scatter(x=log_rv.index, y=log_rv, name="log(Realized Vol)"), row=2, col=1)
-    fig2.add_trace(go.Scatter(x=log_rv.index, y=log_rv.rolling(w_roll).mean(), name="Rolling Mean", line=dict(dash="dash")), row=2, col=1)
-    fig2.add_trace(go.Scatter(x=log_rv.index, y=log_rv.rolling(w_roll).std(), name="Rolling Std", line=dict(dash="dot")), row=2, col=1)
-    fig2.update_yaxes(title_text="log scale", row=1, col=1)
-    fig2.update_yaxes(title_text="log scale", row=2, col=1)
-    fig2.update_xaxes(title_text="Date", row=2, col=1)
-    fig2.update_layout(height=600, legend=dict(orientation="h", y=1.05, x=0))
-    _white_axes(fig2)
     st.plotly_chart(fig2, use_container_width=True)
-    # ADF results
-    def _fmt_adf(label, res):
-        stat, pval, usedlag, nobs, crit, icbest = res
-        interp = "Reject H₀ → stationary" if (stat < crit["5%"] and pval < 0.05) else "Fail to reject H₀"
-        return {
-            "Series": label,
-            "ADF stat": f"{stat:.3f}",
-            "p-value": f"{pval:.4f}",
-            "5% crit": f"{crit['5%']:.3f}",
-            "Interp": interp
-        }
-    st.table(pd.DataFrame([_fmt_adf("log(VIX)", adf_v), _fmt_adf("log(Realized Vol)", adf_r)]))
-    # ===================== SECTION 3 — AR(1) & Half-Life =====================
-    st.header("3) AR(1) Mean-Reversion & Shock Half-Life")
     with st.expander("Methodology", expanded=False):
-        st.write("Fit AR(1) to log-vol and compute the shock half-life.")
-        st.latex(r"z_t = c + \phi z_{t-1} + \varepsilon_t")
-        st.write("If \(0<\phi<1\), shocks decay geometrically. **Half-life**:")
-        st.latex(r"\text{HL} = -\frac{\ln 2}{\ln \phi}")
-        st.write("Shorter HL ⇒ faster mean-reversion.")
-    def estimate_ar1(series: pd.Series):
         y = series.dropna()
         y_lag = y.shift(1).dropna()
         y = y.loc[y_lag.index]
         X = sm.add_constant(y_lag)
         res = sm.OLS(y, X).fit()
-        c = float(res.params["const"])
-        phi = float(res.params[y_lag.name])
-        return c, phi, res
-    c_v, phi_v, res_v = estimate_ar1(log_vix)
-    c_r, phi_r, res_r = estimate_ar1(log_rv)
-    def half_life_from_phi(phi: float):
-        if phi <= 0 or phi >= 1:
-            return np.nan
-        return -np.log(2) / np.log(phi)
-    hl_v = half_life_from_phi(phi_v)
-    hl_r = half_life_from_phi(phi_r)
-    # Scatter + regression lines (side-by-side)
-    def _scatter_fit(series, c, phi, name, color):
-        y = series.dropna()
-        xlag = y.shift(1).dropna()
-        y = y.loc[xlag.index]
-        x_line = np.linspace(float(xlag.min()), float(xlag.max()), 100)
-        return xlag, y, x_line, c + phi * x_line, name, color
-    x1, y1, xl1, yl1, n1, col1 = _scatter_fit(log_vix, c_v, phi_v, "log(VIX)", "#00d2ff")
-    x2, y2, xl2, yl2, n2, col2 = _scatter_fit(log_rv,  c_r, phi_r,  "log(Realized Vol)", "#ff8ef8")
-    fig3 = make_subplots(rows=1, cols=2, subplot_titles=(
-        f"AR(1) on log(VIX): φ={phi_v:.3f}, HL={hl_v:.1f}d" if np.isfinite(hl_v) else f"AR(1) on log(VIX): φ={phi_v:.3f}",
-        f"AR(1) on log(Realized Vol): φ={phi_r:.3f}, HL={hl_r:.1f}d" if np.isfinite(hl_r) else f"AR(1) on log(Realized Vol): φ={phi_r:.3f}"
-    ))
-    fig3.add_trace(go.Scatter(x=x1, y=y1, mode="markers", marker=dict(size=3, opacity=0.5, color=col1), name=n1), row=1, col=1)
-    fig3.add_trace(go.Scatter(x=xl1, y=yl1, mode="lines", line=dict(width=2, color=col1), name="fit"), row=1, col=1)
-    fig3.add_trace(go.Scatter(x=x2, y=y2, mode="markers", marker=dict(size=3, opacity=0.5, color=col2), name=n2), row=1, col=2)
-    fig3.add_trace(go.Scatter(x=xl2, y=yl2, mode="lines", line=dict(width=2, color=col2), name="fit"), row=1, col=2)
-    fig3.update_xaxes(title_text="lagged log-vol", row=1, col=1)
-    fig3.update_yaxes(title_text="log-vol",       row=1, col=1)
-    fig3.update_xaxes(title_text="lagged log-vol", row=1, col=2)
-    fig3.update_yaxes(title_text="log-vol",       row=1, col=2)
-    fig3.update_layout(height=450, legend=dict(orientation="h", y=1.05, x=0))
-    _white_axes(fig3)
-    st.plotly_chart(fig3, use_container_width=True)
-    st.caption(
-        f"AR(1) φ: VIX = **{phi_v:.3f}**, Realized = **{phi_r:.3f}**. "
-        f"Half-life (days): VIX = **{hl_v:.1f}**, Realized = **{hl_r:.1f}** "
-        "(if φ∈(0,1))."
     )
-    # ===================== SECTION 4 — OU Parameters & Rolling Half-Life =====================
-    st.header("4) Ornstein–Uhlenbeck (OU) & Rolling Half-Life")
     with st.expander("Methodology", expanded=False):
-        st.write("Estimate discrete-time OU parameters via Δx regression and track rolling half-life.")
-        st.latex(r"\Delta x_t = a + b\,x_{t-1} + \varepsilon_t \quad\Rightarrow\quad \kappa=-b,\ \ \mu=\frac{a}{\kappa},\ \ \text{HL}=\frac{\ln 2}{\kappa}")
-        st.write("Interpretation: **κ>0** → mean-reverting toward **μ**. Larger κ → faster reversion (shorter HL).")
-    def estimate_ou_params(x: pd.Series):
         x = x.dropna()
         dx = x.diff().dropna()
         x_lag = x.shift(1).loc[dx.index]
         X = sm.add_constant(x_lag)
         res = sm.OLS(dx, X).fit()
-        a = float(res.params["const"])
         b = float(res.params[x_lag.name])
         kappa = -b
-        mu = a / kappa if kappa != 0 else np.nan
-        hl = np.log(2) / kappa if kappa > 0 else np.nan
         sigma = float(res.resid.std())
         return kappa, mu, sigma, hl
-    k_v, mu_v, sig_v, hl_v_ou = estimate_ou_params(log_vix)
-    k_r, mu_r, sig_r, hl_r_ou = estimate_ou_params(log_rv)
-    st.caption(
-        f"OU κ (speed): VIX = **{k_v:.4f}**, Realized = **{k_r:.4f}** | "
-        f"HL (days): VIX = **{hl_v_ou:.1f}**, Realized = **{hl_r_ou:.1f}**"
-    )
     # Rolling half-life series
-    roll = int(ou_roll_window)
-    def rolling_ou_hl(x: pd.Series, w: int):
-        out_idx, out_vals = [], []
-        for i in range(w, len(x)):
-            seg = x.iloc[i-w:i]
-            k, _, _, hl = estimate_ou_params(seg)
-            out_idx.append(x.index[i])
-            out_vals.append(hl)
-        return pd.Series(out_vals, index=pd.Index(out_idx, name="Date"))
-    hl_v_ts = rolling_ou_hl(log_vix, roll)
-    hl_r_ts = rolling_ou_hl(log_rv,  roll)
-    med_v = float(hl_v_ts.median(skipna=True)) if len(hl_v_ts) else np.nan
-    med_r = float(hl_r_ts.median(skipna=True)) if len(hl_r_ts) else np.nan
-    fig4 = make_subplots(rows=1, cols=1, subplot_titles=(f"Rolling OU Half-Life (window={roll}d)",))
-    fig4.add_trace(go.Scatter(x=hl_v_ts.index, y=hl_v_ts, name="HL log(VIX)", line=dict(width=1)))
-    fig4.add_trace(go.Scatter(x=hl_r_ts.index, y=hl_r_ts, name="HL log(Realized Vol)", line=dict(width=1)))
-    if np.isfinite(med_v): fig4.add_hline(y=med_v, line_dash="dash", line_color="#00d2ff", annotation_text=f"Median VIX HL {med_v:.1f}d")
-    if np.isfinite(med_r): fig4.add_hline(y=med_r, line_dash="dash", line_color="#ff8ef8", annotation_text=f"Median RV HL {med_r:.1f}d")
     fig4.update_yaxes(title_text="Half-life (days)")
-    fig4.update_xaxes(title_text="Date")
-    fig4.update_layout(height=450, legend=dict(orientation="h", y=1.05, x=0))
-    _white_axes(fig4)
     st.plotly_chart(fig4, use_container_width=True)
-    # ===================== SECTION 5 — Markov Regimes on log(Realized Vol) =====================
-    st.header("5) High/Low-Volatility Regimes (Markov Switching)")
-    with st.expander("Methodology", expanded=False):
-        st.write("Two-state Markov model on log realized volatility with switching variance.")
-        st.latex(r"y_t \sim \mathcal{N}(\mu_{s_t},\,\sigma^2_{s_t}),\quad s_t\in\{0,1\}")
-        st.latex(r"P=\begin{bmatrix}p_{00} & p_{01}\\ p_{10} & p_{11}\end{bmatrix},\quad \text{Exp spell length of state }j=\frac{1}{1-p_{jj}}")
-        st.write(
-            "The high-vol regime is identified as the state with the higher mean \( \mu \). "
-            "We plot smoothed probabilities and shade periods with \( P(\text{high-vol})>0.5 \)."
-        )
-    y_ms = np.log(rv).dropna()
-    try:
-        ms = MarkovRegression(y_ms, k_regimes=2, trend="c", switching_variance=True)
-        res = ms.fit(disp=False)
-        p = res.smoothed_marginal_probabilities  # columns [0,1]
-        # Transition matrix
-        P = res.model.regime_transition_matrix(res.params).squeeze()
-        # Identify high-vol
-        mean0 = (y_ms * p[0]).sum() / p[0].sum()
-        mean1 = (y_ms * p[1]).sum() / p[1].sum()
-        high = 1 if mean1 > mean0 else 0
-        p_high = p[high]
-        # Expected spell lengths
-        p00, p11 = float(P[0, 0]), float(P[1, 1])
-        exp_len_0 = 1.0 / (1.0 - p00) if p00 < 1 else np.inf
-        exp_len_1 = 1.0 / (1.0 - p11) if p11 < 1 else np.inf
-        # Plot series + shading and probability
-        fig5 = make_subplots(rows=2, cols=1, shared_xaxes=True,
-                             subplot_titles=("log(Realized Vol) with High-Vol Shading",
-                                             f"Smoothed Probability of High-Vol Regime (State {high})"))
-        fig5.add_trace(go.Scatter(x=y_ms.index, y=y_ms, name="log(Realized Vol)"), row=1, col=1)
-        # Shading where p_high > 0.5
-        mask = p_high > 0.5
-        # Highlight spans by drawing rectangles across contiguous True segments
-        grp = (mask != mask.shift()).cumsum()
-        for _, span in mask[mask].groupby(grp):
-            x0 = span.index[0]; x1 = span.index[-1]
-            fig5.add_vrect(x0=x0, x1=x1, fillcolor="red", opacity=0.2, line_width=0, row=1, col=1)
-        fig5.add_trace(go.Scatter(x=p_high.index, y=p_high, name="P(High-Vol)"), row=2, col=1)
-        fig5.add_hline(y=0.5, line_dash="dash", line_color="rgba(180,180,180,0.8)", row=2, col=1)
-        fig5.update_yaxes(title_text="log-vol", row=1, col=1)
-        fig5.update_yaxes(title_text="Probability", row=2, col=1, range=[0, 1])
-        fig5.update_xaxes(title_text="Date", row=2, col=1)
-        fig5.update_layout(height=600, legend=dict(orientation="h", y=1.05, x=0))
-        _white_axes(fig5)
-        st.plotly_chart(fig5, use_container_width=True)
-        # Transition matrix & durations
-        st.subheader("Regime Persistence")
-        tbl = pd.DataFrame(
-            [[P[0,0], P[0,1]], [P[1,0], P[1,1]]],
-            index=["to Reg-0", "to Reg-1"], columns=["from Reg-0", "from Reg-1"]
-        ).round(4)
-        st.table(tbl)
-        st.caption(
-            f"Expected spell lengths — Reg-0: **{exp_len_0:.1f}** days, Reg-1: **{exp_len_1:.1f}** days. "
-            f"High-vol identified as **Reg-{high}**."
-        )
-    except Exception as e:
-        st.warning(f"Markov model failed to converge: {e}")
-    # ============================ Footer note =============================
-    st.markdown(
-        "<sub>Note: analytical settings (windows, scaling, etc.) recompute from the "
-        "cached price data. Changing dates triggers a new data fetch; changing parameters "
-        "does not.</sub>",
-        unsafe_allow_html=True
-    )

+# app.py — Volatility Mean-Reversion (VIX vs Realized Vol)
+# -----------------------------------------------------------------------------
+# Requirements:
+#   pip install streamlit yfinance statsmodels plotly numpy pandas
+# -----------------------------------------------------------------------------
 import io
 from datetime import datetime, timedelta
 import pandas as pd
 import streamlit as st
 import yfinance as yf
+import plotly.graph_objects as go
+from plotly.subplots import make_subplots
 import statsmodels.api as sm
 from statsmodels.tsa.stattools import adfuller
 from statsmodels.tsa.regime_switching.markov_regression import MarkovRegression
+# ----------------------------- Page config & header -----------------------------
 st.set_page_config(page_title="Volatility Mean-Reversion", layout="wide")
 st.title("Volatility Mean-Reversion")
+st.write(
+    "Compare implied volatility (VIX) with realized SPX volatility, test stationarity, "
+    "estimate mean-reversion speed and half-lives (AR(1) & OU), and detect high/low "
+    "volatility regimes via a two-state Markov model."
 )
+# ----------------------------- Sidebar controls -----------------------------
 with st.sidebar:
     st.header("Controls")
     with st.expander("Data Window", expanded=False):
         default_start = datetime(2015, 1, 1).date()
         default_end = (datetime.today().date() + timedelta(days=1))
             value=default_end,
             min_value=default_start,
             max_value=default_end,
+            help="Set to today+1 (default) to include the latest close."
         )
         rv_window = st.number_input(
             "Realized-vol window (days)",
+            value=21, min_value=5, max_value=126, step=1,
+            help="Rolling window for realized volatility (log returns)."
         )
+    with st.expander("Scaling (VIX vs RV)", expanded=False):
         scale_mode = st.selectbox(
+            "Scaling method",
+            options=["Auto (match means)", "Manual"],
+            help="Auto scales realized vol to VIX by matching means; Manual uses your factor."
+        )
+        scale_factor = st.number_input(
+            "Manual scale factor",
+            value=1.0, step=0.1, format="%.3f",
+            help="Only used when 'Manual' is selected.",
+            disabled=(scale_mode != "Manual")
         )
+    with st.expander("Rolling & ADF", expanded=False):
+        roll_win = st.number_input(
+            "Rolling (days) for mean/std displays",
+            value=252, min_value=60, max_value=756, step=10,
+            help="Used to plot rolling mean and standard deviation of log series."
+        )
+        adf_alpha = st.selectbox(
+            "ADF significance level",
+            options=[0.10, 0.05, 0.01],
+            index=1,
+            help="p-value threshold for rejecting unit root (stationarity)."
+        )
+    with st.expander("OU & Half-Life", expanded=False):
         ou_roll_window = st.number_input(
             "OU rolling window (days)",
+            value=252, min_value=126, max_value=756, step=10,
+            help="Window for rolling OU half-life estimates."
         )
+    with st.expander("Markov Regime Model", expanded=False):
+        run_ms = st.checkbox(
+            "Run two-state Markov switching on log(Realized Vol)",
+            value=True,
+            help="Fits a 2-regime model with switching variance and shows shading."
         )
     run_btn = st.button("Run Analysis", type="primary")
+# ----------------------------- Data fetch (cached) -----------------------------
 @st.cache_data(show_spinner=False)
 def fetch_yf_close(tickers: list[str], start: str, end: str) -> pd.DataFrame:
     """
+    Yahoo Finance Close prices ONLY (avoid 'Adj Close' confusion).
+    Returns a DF with columns ['VIX','SPX'] for ['^VIX','^GSPC'] where possible.
     """
+    data = yf.download(tickers, start=start, end=end, progress=False, auto_adjust=False)
     if isinstance(data.columns, pd.MultiIndex):
+        out = data['Close'].copy()  # keep only Close
+    else:
+        out = data[['Close']].copy()
+        col_name = tickers[0] if tickers else 'Close'
+        out = out.rename(columns={'Close': col_name})
+    out = out.rename(columns={'^VIX': 'VIX', '^GSPC': 'SPX'})
+    keep = []
+    if '^VIX' in tickers or 'VIX' in out.columns: keep.append('VIX')
+    if '^GSPC' in tickers or 'SPX' in out.columns: keep.append('SPX')
+    if keep:
+        out = out[[c for c in keep if c in out.columns]]
     return out.sort_index().ffill()
+def _tickformatstops_monthy():
+    # Month-aware tick formats that refine as you zoom
     return [
+        dict(dtickrange=[None, "M1"],  value="%b %Y"),  # < 1M step
+        dict(dtickrange=["M1", "M12"], value="%b %Y"),  # 1M..12M
+        dict(dtickrange=["M12", None], value="%Y")      # >= yearly
     ]
+# ----------------------------- Run pipeline -----------------------------
+if run_btn:
+    start_str = pd.to_datetime(start_date).strftime("%Y-%m-%d")
+    end_str   = pd.to_datetime(end_date).strftime("%Y-%m-%d")
+    with st.spinner("Downloading VIX & SPX…"):
+        px = fetch_yf_close(['^VIX', '^GSPC'], start_str, end_str)
+    if px.empty or not set(['VIX', 'SPX']).issubset(px.columns):
+        st.error("Could not fetch both VIX and SPX 'Close' series. Try a different date range.")
         st.stop()
+    vix = px['VIX'].copy()
+    spx = px['SPX'].copy()
+    # ---------- Section 1: Implied vs Realized Volatility ----------
+    st.header("Implied vs Realized Volatility")
+    with st.expander("Methodology", expanded=False):
+        st.write("We compare **implied volatility (VIX)** to **realized SPX volatility** over a rolling window.")
+        st.write("Log returns and realized volatility:")
+        st.latex(r"r_t = \ln P_t - \ln P_{t-1}, \qquad \mathrm{RV}_{n}(t) = \sqrt{252}\ \mathrm{stdev}\big(r_{t-n+1},\ldots,r_t\big)")
+        st.write("Scaling (to compare levels):")
+        st.latex(r"s = \frac{\overline{\mathrm{VIX}}}{\overline{\mathrm{RV}_n}} \quad \Rightarrow \quad \mathrm{RV}^{\mathrm{scaled}}_n = s\cdot \mathrm{RV}_n")
+        st.write("Gap:")
+        st.latex(r"\Delta_t = \mathrm{VIX}_t - \mathrm{RV}^{\mathrm{scaled}}_{n}(t)")
+        st.write(
+            "Interpretation: VIX > scaled RV suggests an implied risk premium; VIX < scaled RV suggests realized "
+            "volatility is running ‘hot’ relative to implied."
+        )
+    # Realized volatility
     log_ret = np.log(spx).diff()
     rv = log_ret.rolling(int(rv_window)).std() * np.sqrt(252)
     rv = rv.dropna()
+    # Align VIX and compute scaling
     vix = vix.reindex(rv.index).ffill()
+    vix_mean = float(vix.mean()) if len(vix) else np.nan
+    rv_mean  = float(rv.mean())  if len(rv)  else np.nan
     if scale_mode.startswith("Auto"):
+        sf = (vix_mean / rv_mean) if (np.isfinite(vix_mean) and np.isfinite(rv_mean) and rv_mean != 0) else 1.0
     else:
         sf = float(scale_factor)
     rv_scaled = rv * sf
     diff = vix - rv_scaled
+    # Plot: VIX vs RV (row 1), Gap (row 2)
+    fig1 = make_subplots(
+        rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.05,
+        specs=[[{"secondary_y": True}], [{}]],
+        subplot_titles=("VIX vs Realized Volatility", "VIX − Scaled Realized Volatility")
+    )
+    # Row 1
+    fig1.add_trace(go.Scatter(x=vix.index, y=vix, name="VIX", line=dict(width=1, color="cyan")), row=1, col=1, secondary_y=False)
+    fig1.add_trace(go.Scatter(x=rv.index,  y=rv,  name=f"Realized Vol ({int(rv_window)}d)", line=dict(width=1, color="magenta")), row=1, col=1, secondary_y=True)
+    fig1.update_yaxes(title_text="VIX", row=1, col=1, secondary_y=False)
+    fig1.update_yaxes(title_text="Realized Vol", row=1, col=1, secondary_y=True)
+    # Row 2
+    fig1.add_trace(go.Scatter(x=diff.index, y=diff, name="VIX − Scaled RV", line=dict(width=1, color="white")), row=2, col=1)
+    fig1.add_hline(y=0, line_dash="dash", line_color="gray", row=2, col=1)
+    fig1.update_yaxes(title_text="Difference", row=2, col=1)
+    # Style
+    fig1.update_xaxes(
+        tickformatstops=_tickformatstops_monthy(),
+        showgrid=True, gridcolor="rgba(160,160,160,0.2)",
+        showline=True, linecolor="rgba(255,255,255,0.4)"
+    )
+    fig1.update_yaxes(
+        showgrid=True, gridcolor="rgba(160,160,160,0.2)",
+        showline=True, linecolor="rgba(255,255,255,0.4)"
+    )
+    fig1.update_layout(
+        template="plotly_dark",
+        height=650,
+        margin=dict(l=60, r=20, t=60, b=40),
+        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="left", x=0),
+        font=dict(color="white"),
+        hovermode="x unified"
+    )
+    # Ensure white subplot titles
+    if hasattr(fig1.layout, "annotations"):
+        for a in fig1.layout.annotations:
+            a.font = dict(color="white", size=12)
+    st.plotly_chart(fig1, use_container_width=True)
+    # ---------- Section 2: Stationarity (ADF) & Rolling Diagnostics ----------
+    st.header("Stationarity & Rolling Diagnostics")
     with st.expander("Methodology", expanded=False):
+        st.write("Test whether log-volatility is stationary (mean-reverting) using the ADF test.")
+        st.latex(r"\text{ADF null: unit root (non-stationary)}\quad\text{vs}\quad \text{stationary (mean-reverting)}")
+        st.write("Rolling mean and std provide a visual check of stability over time.")
+    # log series
+    log_vix      = np.log(vix)
+    log_real_vol = np.log(rv)
+    # ADF tests
+    adf_vix = adfuller(log_vix.dropna(), autolag='AIC')
+    adf_rv  = adfuller(log_real_vol.dropna(), autolag='AIC')
+    # Rolling plots (two rows)
+    fig2 = make_subplots(
+        rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.06,
+        subplot_titles=(f"log(VIX) with {int(roll_win)}d Rolling Mean & Std",
+                        f"log(Realized Vol) with {int(roll_win)}d Rolling Mean & Std")
+    )
+    # log(VIX)
+    fig2.add_trace(go.Scatter(x=log_vix.index, y=log_vix, name="log(VIX)", line=dict(width=1, color="#00d2ff")), row=1, col=1)
+    fig2.add_trace(go.Scatter(x=log_vix.index, y=log_vix.rolling(int(roll_win)).mean(), name="Rolling Mean", line=dict(width=1, dash="dash", color="#aaaaaa")), row=1, col=1)
+    fig2.add_trace(go.Scatter(x=log_vix.index, y=log_vix.rolling(int(roll_win)).std(),  name="Rolling Std",  line=dict(width=1, dash="dot",  color="#888888")), row=1, col=1)
+    # log(RV)
+    fig2.add_trace(go.Scatter(x=log_real_vol.index, y=log_real_vol, name="log(Realized Vol)", line=dict(width=1, color="#ff6ad5")), row=2, col=1)
+    fig2.add_trace(go.Scatter(x=log_real_vol.index, y=log_real_vol.rolling(int(roll_win)).mean(), name="Rolling Mean", line=dict(width=1, dash="dash", color="#aaaaaa")), row=2, col=1)
+    fig2.add_trace(go.Scatter(x=log_real_vol.index, y=log_real_vol.rolling(int(roll_win)).std(),  name="Rolling Std",  line=dict(width=1, dash="dot",  color="#888888")), row=2, col=1)
+    fig2.update_yaxes(title_text="Level", row=1, col=1)
+    fig2.update_yaxes(title_text="Level", row=2, col=1)
+    fig2.update_xaxes(
+        tickformatstops=_tickformatstops_monthy(),
+        showgrid=True, gridcolor="rgba(160,160,160,0.2)",
+        showline=True, linecolor="rgba(255,255,255,0.4)"
+    )
+    fig2.update_yaxes(
+        showgrid=True, gridcolor="rgba(160,160,160,0.2)",
+        showline=True, linecolor="rgba(255,255,255,0.4)"
+    )
+    fig2.update_layout(
+        template="plotly_dark",
+        height=650,
+        margin=dict(l=60, r=20, t=60, b=40),
+        font=dict(color="white"),
+        hovermode="x unified"
+    )
+    if hasattr(fig2.layout, "annotations"):
+        for a in fig2.layout.annotations:
+            a.font = dict(color="white", size=12)
     st.plotly_chart(fig2, use_container_width=True)
+    # ADF interpretation (match raw narrative style)
+    def _print_adf(name, adf_res, alpha):
+        buf = io.StringIO()
+        stat, pvalue, usedlag, nobs, crit_vals, icbest = adf_res
+        print(f"ADF Test on {name}:", file=buf)
+        print(f"  Statistic : {stat:.4f}", file=buf)
+        print(f"  p-value   : {pvalue:.4f}", file=buf)
+        print("  Critical Values:", file=buf)
+        for lvl, val in crit_vals.items():
+            print(f"    {lvl}: {val:.4f}", file=buf)
+        if (stat < crit_vals['5%']) and (pvalue < alpha):
+            print("  → Reject H₀: series is stationary (mean-reverting)\n", file=buf)
+        else:
+            print("  → Fail to reject H₀: series likely has a unit root (no clear mean-reversion)\n", file=buf)
+        return buf.getvalue()
+    with st.expander("ADF Results & Interpretation", expanded=False):
+        st.text(_print_adf("log(VIX)", adf_vix, adf_alpha))
+        st.text(_print_adf("log(Realized Vol)", adf_rv, adf_alpha))
+    # ---------- Section 3: AR(1) & Half-Lives ----------
+    st.header("AR(1) Mean-Reversion & Shock Half-Lives")
     with st.expander("Methodology", expanded=False):
+        st.write("Fit AR(1):")
+        st.latex(r"y_t = c + \phi y_{t-1} + \varepsilon_t")
+        st.write("Half-life (days) of a one-off shock:")
+        st.latex(r"\mathrm{HL} = -\frac{\ln 2}{\ln \phi} \quad \text{(valid if } 0<\phi<1\text{)}")
+        st.write("Interpretation: smaller HL ⇒ faster mean-reversion.")
+    def estimate_ar1(series):
         y = series.dropna()
         y_lag = y.shift(1).dropna()
         y = y.loc[y_lag.index]
         X = sm.add_constant(y_lag)
         res = sm.OLS(y, X).fit()
+        return float(res.params['const']), float(res.params[1])
+    c_vix, phi_vix = estimate_ar1(np.log(vix))
+    c_rv,  phi_rv  = estimate_ar1(np.log(rv))
+    # Half-lives (guard domain)
+    hl_vix = (-np.log(2) / np.log(phi_vix)) if (phi_vix > 0 and phi_vix != 1) else np.nan
+    hl_rv  = (-np.log(2) / np.log(phi_rv))  if (phi_rv > 0 and phi_rv  != 1) else np.nan
+    # Scatter & regression lines
+    fig3 = make_subplots(
+        rows=1, cols=2, subplot_titles=(f"AR(1) on log(VIX)\nφ={phi_vix:.3f}, HL={hl_vix:.1f}d",
+                                        f"AR(1) on log(Realized Vol)\nφ={phi_rv:.3f}, HL={hl_rv:.1f}d")
+    )
+    # VIX panel
+    y  = np.log(vix).dropna()
+    yl = y.shift(1).dropna()
+    y  = y.loc[yl.index]
+    x_line = np.linspace(float(yl.min()), float(yl.max()), 100)
+    fig3.add_trace(go.Scatter(x=yl, y=y, mode="markers", marker=dict(size=4, color="white"), name="Data"), row=1, col=1)
+    fig3.add_trace(go.Scatter(x=x_line, y=c_vix + phi_vix * x_line, name=f"Fit: y={phi_vix:.2f}·x+{c_vix:.2f}", line=dict(color="cyan")), row=1, col=1)
+    fig3.update_xaxes(title_text="log(VIX) lagged", row=1, col=1)
+    fig3.update_yaxes(title_text="log(VIX)", row=1, col=1)
+    # RV panel
+    y  = np.log(rv).dropna()
+    yl = y.shift(1).dropna()
+    y  = y.loc[yl.index]
+    x_line = np.linspace(float(yl.min()), float(yl.max()), 100)
+    fig3.add_trace(go.Scatter(x=yl, y=y, mode="markers", marker=dict(size=4, color="white"), name="Data"), row=1, col=2)
+    fig3.add_trace(go.Scatter(x=x_line, y=c_rv + phi_rv * x_line, name=f"Fit: y={phi_rv:.2f}·x+{c_rv:.2f}", line=dict(color="magenta")), row=1, col=2)
+    fig3.update_xaxes(title_text="log(RV) lagged", row=1, col=2)
+    fig3.update_yaxes(title_text="log(RV)", row=1, col=2)
+    fig3.update_layout(
+        template="plotly_dark",
+        height=450,
+        margin=dict(l=50, r=20, t=80, b=40),
+        font=dict(color="white")
     )
+    if hasattr(fig3.layout, "annotations"):
+        for a in fig3.layout.annotations:
+            a.font = dict(color="white", size=12)
+    st.plotly_chart(fig3, use_container_width=True)
+    with st.expander("AR(1) Results (raw-style text)", expanded=False):
+        buf = io.StringIO()
+        print("AR(1) on log(VIX):", file=buf)
+        print(f"  φ         = {phi_vix:.4f}", file=buf)
+        print(f"  Half-life = {hl_vix:.1f} days", file=buf)
+        print(f"  → A one-time shock to log(VIX) decays by half after about {hl_vix:.1f} trading days.", file=buf)
+        print("  → |φ| < 1: log(VIX) is stationary (mean-reverting)\n" if abs(phi_vix) < 1 else
+              "  → |φ| ≥ 1: log(VIX) is non-stationary (no mean-reversion)\n", file=buf)
+        print("AR(1) on log(Realized Vol):", file=buf)
+        print(f"  φ         = {phi_rv:.4f}", file=buf)
+        print(f"  Half-life = {hl_rv:.1f} days", file=buf)
+        print(f"  → A one-time shock to log(Realized Vol) decays by half after about {hl_rv:.1f} trading days.", file=buf)
+        print("  → |φ| < 1: log(Realized Vol) is stationary (mean-reverting)\n" if abs(phi_rv) < 1 else
+              "  → |φ| ≥ 1: log(Realized Vol) is non-stationary (no mean-reversion)\n", file=buf)
+        st.text(buf.getvalue())
+    # ---------- Section 4: OU Parameters & Rolling Half-Lives ----------
+    st.header("Ornstein–Uhlenbeck (OU) & Rolling Half-Life")
     with st.expander("Methodology", expanded=False):
+        st.write("Discrete OU approximation on log-volatility:")
+        st.latex(r"x_t - x_{t-1} = a + b\,x_{t-1} + \varepsilon_t \quad \Rightarrow \quad \kappa = -b,\ \ \mu = \frac{a}{\kappa}")
+        st.write("Half-life (days):")
+        st.latex(r"\mathrm{HL} = \frac{\ln 2}{\kappa} \quad (\kappa>0)")
+        st.write("We estimate OU on rolling windows to see how mean-reversion speed changes over time.")
+    def _ou_params(x: pd.Series):
         x = x.dropna()
         dx = x.diff().dropna()
         x_lag = x.shift(1).loc[dx.index]
         X = sm.add_constant(x_lag)
         res = sm.OLS(dx, X).fit()
+        a = float(res.params['const'])
         b = float(res.params[x_lag.name])
         kappa = -b
+        mu = (a / kappa) if kappa != 0 else np.nan
         sigma = float(res.resid.std())
+        hl = (np.log(2) / kappa) if kappa > 0 else np.nan
         return kappa, mu, sigma, hl
+    κ_vix, μ_vix, σ_vix, hl_vix_ou = _ou_params(np.log(vix))
+    κ_rv,  μ_rv,  σ_rv,  hl_rv_ou  = _ou_params(np.log(rv))
     # Rolling half-life series
+    def _rolling_hl(x: pd.Series, window: int):
+        xs = x.dropna()
+        hl = []
+        idx = []
+        for i in range(window, len(xs)):
+            seg = xs.iloc[i-window:i]
+            k, _, _, hl_i = _ou_params(seg)
+            hl.append(hl_i)
+            idx.append(seg.index[-1])
+        return pd.Series(hl, index=pd.Index(idx, name="Date"))
+    hl_vix_ts = _rolling_hl(np.log(vix), int(ou_roll_window))
+    hl_rv_ts  = _rolling_hl(np.log(rv),  int(ou_roll_window))
+    med_vix = float(hl_vix_ts.median()) if hl_vix_ts.notna().any() else np.nan
+    med_rv  = float(hl_rv_ts.median())  if hl_rv_ts.notna().any()  else np.nan
+    fig4 = go.Figure()
+    fig4.add_trace(go.Scatter(x=hl_vix_ts.index, y=hl_vix_ts, name="HL log(VIX)", line=dict(color="cyan", width=1)))
+    fig4.add_trace(go.Scatter(x=hl_rv_ts.index,  y=hl_rv_ts,  name="HL log(RV)",  line=dict(color="magenta", width=1)))
+    if np.isfinite(med_vix):
+        fig4.add_hline(y=med_vix, line_dash="dash", line_color="cyan", opacity=0.6)
+    if np.isfinite(med_rv):
+        fig4.add_hline(y=med_rv, line_dash="dash", line_color="magenta", opacity=0.6)
     fig4.update_yaxes(title_text="Half-life (days)")
+    fig4.update_xaxes(
+        tickformatstops=_tickformatstops_monthy(),
+        showgrid=True, gridcolor="rgba(160,160,160,0.2)",
+        showline=True, linecolor="rgba(255,255,255,0.4)"
+    )
+    fig4.update_layout(
+        template="plotly_dark",
+        height=450,
+        margin=dict(l=60, r=20, t=60, b=40),
+        font=dict(color="white")
+    )
     st.plotly_chart(fig4, use_container_width=True)
+    with st.expander("OU Results (raw-style text)", expanded=False):
+        buf = io.StringIO()
+        print("OU fit on log(VIX):", file=buf)
+        print(f"  κ         = {κ_vix:.4f}", file=buf)
+        print(f"  μ         = {μ_vix:.4f}", file=buf)
+        print(f"  σ         = {σ_vix:.4f}", file=buf)
+        print(f"  Half-life = {hl_vix_ou:.1f} days", file=buf)
+        if κ_vix > 0:
+            print("  → κ > 0: process is mean-reverting toward μ.", file=buf)
+            print(f"  → A shock decays by half in {hl_vix_ou:.1f} trading days.\n", file=buf)
+        else:
+            print("  → κ ≤ 0: no mean-reversion detected.\n", file=buf)
+        print("OU fit on log(Realized Vol):", file=buf)
+        print(f"  κ         = {κ_rv:.4f}", file=buf)
+        print(f"  μ         = {μ_rv:.4f}", file=buf)
+        print(f"  σ         = {σ_rv:.4f}", file=buf)
+        print(f"  Half-life = {hl_rv_ou:.1f} days", file=buf)
+        if κ_rv > 0:
+            print("  → κ > 0: process is mean-reverting toward μ.", file=buf)
+            print(f"  → A shock decays by half in {hl_rv_ou:.1f} trading days.\n", file=buf)
+        else:
+            print("  → κ ≤ 0: no mean-reversion detected.\n", file=buf)
+        # Simple interpretation of rolling HLs
+        print("Median OU half-life over history:", file=buf)
+        print(f"  log(VIX)          = {med_vix:.1f} days", file=buf)
+        print(f"  log(Realized Vol) = {med_rv:.1f} days", file=buf)
+        if np.isfinite(med_vix) and np.isfinite(med_rv):
+            if med_vix < med_rv:
+                print("  → On average, log(VIX) mean-reverts faster than log(Realized Vol).\n", file=buf)
+            else:
+                print("  → On average, log(Realized Vol) mean-reverts faster than log(VIX).\n", file=buf)
+        st.text(buf.getvalue())
+    # ---------- Section 5: Two-State Markov Regimes ----------
+    if run_ms:
+        st.header("Two-State Markov Regime Model (log Realized Vol)")
+        with st.expander("Methodology", expanded=False):
+            st.write("We fit a **two-regime Markov switching** model on log(Realized Vol):")
+            st.latex(r"y_t = c_{s_t} + \varepsilon_{t}, \quad \varepsilon_t \sim \mathcal{N}(0,\sigma^2_{s_t}), \quad s_t \in \{0,1\}")
+            st.write("The model estimates transition probabilities between regimes and smoothed probabilities over time.")
+            st.latex(r"P = \begin{pmatrix}p_{00} & p_{01}\\ p_{10} & p_{11}\end{pmatrix}, \quad \mathbb{E}[\text{spell length in } j] = \frac{1}{1-p_{jj}}")
+            st.write("Interpretation: high-vol regime persistence ⇒ longer stressful periods; a rising probability can warn of transitions.")
+        series = np.log(rv).dropna()
+        if len(series) < 300:
+            st.warning("Not enough history to fit a stable Markov model. Increase the date range.")
+        else:
+            ms = MarkovRegression(series, k_regimes=2, trend='c', switching_variance=True)
+            res = ms.fit(disp=False)
+            p = res.smoothed_marginal_probabilities  # DataFrame with cols [0,1]
+            # Transition matrix
+            T = res.model.regime_transition_matrix(res.params).squeeze()
+            p00, p01 = float(T[0,0]), float(T[0,1])
+            p10, p11 = float(T[1,0]), float(T[1,1])
+            exp_len_0 = 1.0 / (1.0 - p00) if p00 < 1 else np.inf
+            exp_len_1 = 1.0 / (1.0 - p11) if p11 < 1 else np.inf
+            # Which regime is "high vol"?
+            mean0 = float((series * p[0]).sum() / p[0].sum())
+            mean1 = float((series * p[1]).sum() / p[1].sum())
+            high = 1 if mean1 > mean0 else 0
+            p_high = p[high]
+            # Plot: top series with shading; bottom probability
+            fig5 = make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.06,
+                                 subplot_titles=("log(Realized Vol) with High-Vol Regime Shading",
+                                                 f"Smoothed Probability of High-Vol Regime (Regime {high})"))
+            # Top line
+            fig5.add_trace(go.Scatter(x=series.index, y=series, name="log(RV)", line=dict(color="white", width=1)), row=1, col=1)
+            # Shading spans where p_high>0.5
+            mask = (p_high > 0.5)
+            grp = (mask != mask.shift()).cumsum()
+            for _, span in mask[mask].groupby(grp):
+                x0 = span.index[0]; x1 = span.index[-1]
+                fig5.add_vrect(x0=x0, x1=x1, line_width=0, fillcolor="red", opacity=0.2, row=1, col=1)
+            # Bottom probability
+            fig5.add_trace(go.Scatter(x=p_high.index, y=p_high, name=f"P(Regime {high})", line=dict(color="magenta", width=1)), row=2, col=1)
+            fig5.add_hline(y=0.5, line_dash="dash", line_color="gray", row=2, col=1)
+            fig5.update_yaxes(title_text="log(RV)", row=1, col=1)
+            fig5.update_yaxes(title_text="Probability", row=2, col=1)
+            fig5.update_xaxes(
+                tickformatstops=_tickformatstops_monthy(),
+                showgrid=True, gridcolor="rgba(160,160,160,0.2)",
+                showline=True, linecolor="rgba(255,255,255,0.4)"
+            )
+            fig5.update_yaxes(
+                showgrid=True, gridcolor="rgba(160,160,160,0.2)",
+                showline=True, linecolor="rgba(255,255,255,0.4)"
+            )
+            fig5.update_layout(
+                template="plotly_dark",
+                height=600,
+                margin=dict(l=60, r=20, t=60, b=40),
+                font=dict(color="white")
+            )
+            if hasattr(fig5.layout, "annotations"):
+                for a in fig5.layout.annotations:
+                    a.font = dict(color="white", size=12)
+            st.plotly_chart(fig5, use_container_width=True)
+            with st.expander("Markov Model Results (raw-style text)", expanded=False):
+                buf = io.StringIO()
+                print("\nEstimated transition probabilities (rows = to, cols = from)", file=buf)
+                print("          from Reg-0   from Reg-1", file=buf)
+                print(f"to Reg-0   {p00:.4f}       {p10:.4f}", file=buf)
+                print(f"to Reg-1   {p01:.4f}       {p11:.4f}", file=buf)
+                print("\nInterpretation:", file=buf)
+                print(f"• Low-vol regime (Reg-0) persistence = {p00:.2%}. Avg spell ≈ {exp_len_0:.1f} trading days.", file=buf)
+                print(f"• High-vol regime (Reg-1) persistence = {p11:.2%}. Avg spell ≈ {exp_len_1:.1f} trading days.", file=buf)
+                print(f"• Chance of jumping LOW → HIGH next day = {p01:.2%}.", file=buf)
+                print(f"• Chance of jumping HIGH → LOW next day = {p10:.2%}.\n", file=buf)
+                st.text(buf.getvalue())
+    st.success("Analysis complete.")