Spaces:

QuantumLearner
/

Space75

Sleeping

File size: 26,705 Bytes

# app.py — Volatility Mean-Reversion (VIX vs Realized Vol)
# -----------------------------------------------------------------------------
# Requirements:
#   pip install streamlit yfinance statsmodels plotly numpy pandas
# -----------------------------------------------------------------------------

import io
from datetime import datetime, timedelta

import numpy as np
import pandas as pd
import streamlit as st
import yfinance as yf
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.regime_switching.markov_regression import MarkovRegression

# ----------------------------- Page config & header -----------------------------
st.set_page_config(page_title="Volatility Mean-Reversion", layout="wide")
st.title("Volatility Mean-Reversion")

st.write(
    "Compare implied volatility (VIX) with realized SPX volatility, test stationarity, "
    "estimate mean-reversion speed and half-lives (AR(1) & OU), and detect high/low "
    "volatility regimes via a two-state Markov model."
)

# ----------------------------- Sidebar controls -----------------------------
with st.sidebar:
    st.header("Controls")

    with st.expander("Data Window", expanded=False):
        default_start = datetime(2015, 1, 1).date()
        default_end = (datetime.today().date() + timedelta(days=1))
        start_date = st.date_input(
            "Start date",
            value=default_start,
            min_value=datetime(2000, 1, 1).date(),
            max_value=default_end,
            help="Earlier start = more history. Later start = faster."
        )
        end_date = st.date_input(
            "End date",
            value=default_end,
            min_value=default_start,
            max_value=default_end,
            help="Set to today+1 (default) to include the latest close."
        )
        rv_window = st.number_input(
            "Realized-vol window (days)",
            value=21, min_value=5, max_value=126, step=1,
            help="Rolling window for realized volatility (log returns)."
        )

    with st.expander("Scaling (VIX vs RV)", expanded=False):
        scale_mode = st.selectbox(
            "Scaling method",
            options=["Auto (match means)", "Manual"],
            help="Auto scales realized vol to VIX by matching means; Manual uses your factor."
        )
        scale_factor = st.number_input(
            "Manual scale factor",
            value=1.0, step=0.1, format="%.3f",
            help="Only used when 'Manual' is selected.",
            disabled=(scale_mode != "Manual")
        )

    with st.expander("Rolling & ADF", expanded=False):
        roll_win = st.number_input(
            "Rolling (days) for mean/std displays",
            value=252, min_value=60, max_value=756, step=10,
            help="Used to plot rolling mean and standard deviation of log series."
        )
        adf_alpha = st.selectbox(
            "ADF significance level",
            options=[0.10, 0.05, 0.01],
            index=1,
            help="p-value threshold for rejecting unit root (stationarity)."
        )

    with st.expander("OU & Half-Life", expanded=False):
        ou_roll_window = st.number_input(
            "OU rolling window (days)",
            value=252, min_value=126, max_value=756, step=10,
            help="Window for rolling OU half-life estimates."
        )

    with st.expander("Markov Regime Model", expanded=False):
        run_ms = st.checkbox(
            "Run two-state Markov switching on log(Realized Vol)",
            value=True,
            help="Fits a 2-regime model with switching variance and shows shading."
        )

    run_btn = st.button("Run Analysis", type="primary")

# ----------------------------- Data fetch (cached) -----------------------------
@st.cache_data(show_spinner=False)
def fetch_yf_close(tickers: list[str], start: str, end: str) -> pd.DataFrame:
    """
    Yahoo Finance Close prices ONLY (avoid 'Adj Close' confusion).
    Returns a DF with columns ['VIX','SPX'] for ['^VIX','^GSPC'] where possible.
    """
    data = yf.download(tickers, start=start, end=end, progress=False, auto_adjust=False)
    if isinstance(data.columns, pd.MultiIndex):
        out = data['Close'].copy()  # keep only Close
    else:
        out = data[['Close']].copy()
        col_name = tickers[0] if tickers else 'Close'
        out = out.rename(columns={'Close': col_name})

    out = out.rename(columns={'^VIX': 'VIX', '^GSPC': 'SPX'})
    keep = []
    if '^VIX' in tickers or 'VIX' in out.columns: keep.append('VIX')
    if '^GSPC' in tickers or 'SPX' in out.columns: keep.append('SPX')
    if keep:
        out = out[[c for c in keep if c in out.columns]]
    return out.sort_index().ffill()

def _tickformatstops_monthy():
    # Month-aware tick formats that refine as you zoom
    return [
        dict(dtickrange=[None, "M1"],  value="%b %Y"),  # < 1M step
        dict(dtickrange=["M1", "M12"], value="%b %Y"),  # 1M..12M
        dict(dtickrange=["M12", None], value="%Y")      # >= yearly
    ]

# ----------------------------- Run pipeline -----------------------------
if run_btn:
    start_str = pd.to_datetime(start_date).strftime("%Y-%m-%d")
    end_str   = pd.to_datetime(end_date).strftime("%Y-%m-%d")

    with st.spinner("Downloading VIX & SPX…"):
        px = fetch_yf_close(['^VIX', '^GSPC'], start_str, end_str)

    if px.empty or not set(['VIX', 'SPX']).issubset(px.columns):
        st.error("Could not fetch both VIX and SPX 'Close' series. Try a different date range.")
        st.stop()

    vix = px['VIX'].copy()
    spx = px['SPX'].copy()

    # ---------- Section 1: Implied vs Realized Volatility ----------
    st.header("Implied vs Realized Volatility")
    with st.expander("Methodology", expanded=False):
        st.write("We compare **implied volatility (VIX)** to **realized SPX volatility** over a rolling window.")
        st.write("Log returns and realized volatility:")
        st.latex(r"r_t = \ln P_t - \ln P_{t-1}, \qquad \mathrm{RV}_{n}(t) = \sqrt{252}\ \mathrm{stdev}\big(r_{t-n+1},\ldots,r_t\big)")
        st.write("Scaling (to compare levels):")
        st.latex(r"s = \frac{\overline{\mathrm{VIX}}}{\overline{\mathrm{RV}_n}} \quad \Rightarrow \quad \mathrm{RV}^{\mathrm{scaled}}_n = s\cdot \mathrm{RV}_n")
        st.write("Gap:")
        st.latex(r"\Delta_t = \mathrm{VIX}_t - \mathrm{RV}^{\mathrm{scaled}}_{n}(t)")
        st.write(
            "Interpretation: VIX > scaled RV suggests an implied risk premium; VIX < scaled RV suggests realized "
            "volatility is running ‘hot’ relative to implied."
        )

    # Realized volatility
    log_ret = np.log(spx).diff()
    rv = log_ret.rolling(int(rv_window)).std() * np.sqrt(252)
    rv = rv.dropna()

    # Align VIX and compute scaling
    vix = vix.reindex(rv.index).ffill()
    vix_mean = float(vix.mean()) if len(vix) else np.nan
    rv_mean  = float(rv.mean())  if len(rv)  else np.nan
    if scale_mode.startswith("Auto"):
        sf = (vix_mean / rv_mean) if (np.isfinite(vix_mean) and np.isfinite(rv_mean) and rv_mean != 0) else 1.0
    else:
        sf = float(scale_factor)

    rv_scaled = rv * sf
    diff = vix - rv_scaled

    # Plot: VIX vs RV (row 1), Gap (row 2)
    fig1 = make_subplots(
        rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.05,
        specs=[[{"secondary_y": True}], [{}]],
        subplot_titles=("VIX vs Realized Volatility", "VIX − Scaled Realized Volatility")
    )
    # Row 1
    fig1.add_trace(go.Scatter(x=vix.index, y=vix, name="VIX", line=dict(width=1, color="cyan")), row=1, col=1, secondary_y=False)
    fig1.add_trace(go.Scatter(x=rv.index,  y=rv,  name=f"Realized Vol ({int(rv_window)}d)", line=dict(width=1, color="magenta")), row=1, col=1, secondary_y=True)
    fig1.update_yaxes(title_text="VIX", row=1, col=1, secondary_y=False)
    fig1.update_yaxes(title_text="Realized Vol", row=1, col=1, secondary_y=True)

    # Row 2
    fig1.add_trace(go.Scatter(x=diff.index, y=diff, name="VIX − Scaled RV", line=dict(width=1, color="white")), row=2, col=1)
    fig1.add_hline(y=0, line_dash="dash", line_color="gray", row=2, col=1)
    fig1.update_yaxes(title_text="Difference", row=2, col=1)

    # Style
    fig1.update_xaxes(
        tickformatstops=_tickformatstops_monthy(),
        showgrid=True, gridcolor="rgba(160,160,160,0.2)",
        showline=True, linecolor="rgba(255,255,255,0.4)"
    )
    fig1.update_yaxes(
        showgrid=True, gridcolor="rgba(160,160,160,0.2)",
        showline=True, linecolor="rgba(255,255,255,0.4)"
    )
    fig1.update_layout(
        template="plotly_dark",
        height=650,
        margin=dict(l=60, r=20, t=60, b=40),
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="left", x=0),
        font=dict(color="white"),
        hovermode="x unified"
    )
    # Ensure white subplot titles
    if hasattr(fig1.layout, "annotations"):
        for a in fig1.layout.annotations:
            a.font = dict(color="white", size=12)
    st.plotly_chart(fig1, use_container_width=True)

    # ---------- Section 2: Stationarity (ADF) & Rolling Diagnostics ----------
    st.header("Stationarity & Rolling Diagnostics")
    with st.expander("Methodology", expanded=False):
        st.write("Test whether log-volatility is stationary (mean-reverting) using the ADF test.")
        st.latex(r"\text{ADF null: unit root (non-stationary)}\quad\text{vs}\quad \text{stationary (mean-reverting)}")
        st.write("Rolling mean and std provide a visual check of stability over time.")

    # log series
    log_vix      = np.log(vix)
    log_real_vol = np.log(rv)

    # ADF tests
    adf_vix = adfuller(log_vix.dropna(), autolag='AIC')
    adf_rv  = adfuller(log_real_vol.dropna(), autolag='AIC')

    # Rolling plots (two rows)
    fig2 = make_subplots(
        rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.06,
        subplot_titles=(f"log(VIX) with {int(roll_win)}d Rolling Mean & Std",
                        f"log(Realized Vol) with {int(roll_win)}d Rolling Mean & Std")
    )
    # log(VIX)
    fig2.add_trace(go.Scatter(x=log_vix.index, y=log_vix, name="log(VIX)", line=dict(width=1, color="#00d2ff")), row=1, col=1)
    fig2.add_trace(go.Scatter(x=log_vix.index, y=log_vix.rolling(int(roll_win)).mean(), name="Rolling Mean", line=dict(width=1, dash="dash", color="#aaaaaa")), row=1, col=1)
    fig2.add_trace(go.Scatter(x=log_vix.index, y=log_vix.rolling(int(roll_win)).std(),  name="Rolling Std",  line=dict(width=1, dash="dot",  color="#888888")), row=1, col=1)

    # log(RV)
    fig2.add_trace(go.Scatter(x=log_real_vol.index, y=log_real_vol, name="log(Realized Vol)", line=dict(width=1, color="#ff6ad5")), row=2, col=1)
    fig2.add_trace(go.Scatter(x=log_real_vol.index, y=log_real_vol.rolling(int(roll_win)).mean(), name="Rolling Mean", line=dict(width=1, dash="dash", color="#aaaaaa")), row=2, col=1)
    fig2.add_trace(go.Scatter(x=log_real_vol.index, y=log_real_vol.rolling(int(roll_win)).std(),  name="Rolling Std",  line=dict(width=1, dash="dot",  color="#888888")), row=2, col=1)

    fig2.update_yaxes(title_text="Level", row=1, col=1)
    fig2.update_yaxes(title_text="Level", row=2, col=1)

    fig2.update_xaxes(
        tickformatstops=_tickformatstops_monthy(),
        showgrid=True, gridcolor="rgba(160,160,160,0.2)",
        showline=True, linecolor="rgba(255,255,255,0.4)"
    )
    fig2.update_yaxes(
        showgrid=True, gridcolor="rgba(160,160,160,0.2)",
        showline=True, linecolor="rgba(255,255,255,0.4)"
    )
    fig2.update_layout(
        template="plotly_dark",
        height=650,
        margin=dict(l=60, r=20, t=60, b=40),
        font=dict(color="white"),
        hovermode="x unified"
    )
    if hasattr(fig2.layout, "annotations"):
        for a in fig2.layout.annotations:
            a.font = dict(color="white", size=12)
    st.plotly_chart(fig2, use_container_width=True)

    # ADF interpretation (match raw narrative style)
    def _print_adf(name, adf_res, alpha):
        buf = io.StringIO()
        stat, pvalue, usedlag, nobs, crit_vals, icbest = adf_res
        print(f"ADF Test on {name}:", file=buf)
        print(f"  Statistic : {stat:.4f}", file=buf)
        print(f"  p-value   : {pvalue:.4f}", file=buf)
        print("  Critical Values:", file=buf)
        for lvl, val in crit_vals.items():
            print(f"    {lvl}: {val:.4f}", file=buf)
        if (stat < crit_vals['5%']) and (pvalue < alpha):
            print("  → Reject H₀: series is stationary (mean-reverting)\n", file=buf)
        else:
            print("  → Fail to reject H₀: series likely has a unit root (no clear mean-reversion)\n", file=buf)
        return buf.getvalue()

    with st.expander("ADF Results & Interpretation", expanded=False):
        st.text(_print_adf("log(VIX)", adf_vix, adf_alpha))
        st.text(_print_adf("log(Realized Vol)", adf_rv, adf_alpha))

    # ---------- Section 3: AR(1) & Half-Lives ----------
    st.header("AR(1) Mean-Reversion & Shock Half-Lives")
    with st.expander("Methodology", expanded=False):
        st.write("Fit AR(1):")
        st.latex(r"y_t = c + \phi y_{t-1} + \varepsilon_t")
        st.write("Half-life (days) of a one-off shock:")
        st.latex(r"\mathrm{HL} = -\frac{\ln 2}{\ln \phi} \quad \text{(valid if } 0<\phi<1\text{)}")
        st.write("Interpretation: smaller HL ⇒ faster mean-reversion.")

    def estimate_ar1(series):
        y = series.dropna()
        y_lag = y.shift(1).dropna()
        y = y.loc[y_lag.index]
        X = sm.add_constant(y_lag)
        res = sm.OLS(y, X).fit()
        return float(res.params['const']), float(res.params[1])

    c_vix, phi_vix = estimate_ar1(np.log(vix))
    c_rv,  phi_rv  = estimate_ar1(np.log(rv))

    # Half-lives (guard domain)
    hl_vix = (-np.log(2) / np.log(phi_vix)) if (phi_vix > 0 and phi_vix != 1) else np.nan
    hl_rv  = (-np.log(2) / np.log(phi_rv))  if (phi_rv > 0 and phi_rv  != 1) else np.nan

    # Scatter & regression lines
    fig3 = make_subplots(
        rows=1, cols=2, subplot_titles=(f"AR(1) on log(VIX)\nφ={phi_vix:.3f}, HL={hl_vix:.1f}d",
                                        f"AR(1) on log(Realized Vol)\nφ={phi_rv:.3f}, HL={hl_rv:.1f}d")
    )

    # VIX panel
    y  = np.log(vix).dropna()
    yl = y.shift(1).dropna()
    y  = y.loc[yl.index]
    x_line = np.linspace(float(yl.min()), float(yl.max()), 100)
    fig3.add_trace(go.Scatter(x=yl, y=y, mode="markers", marker=dict(size=4, color="white"), name="Data"), row=1, col=1)
    fig3.add_trace(go.Scatter(x=x_line, y=c_vix + phi_vix * x_line, name=f"Fit: y={phi_vix:.2f}·x+{c_vix:.2f}", line=dict(color="cyan")), row=1, col=1)
    fig3.update_xaxes(title_text="log(VIX) lagged", row=1, col=1)
    fig3.update_yaxes(title_text="log(VIX)", row=1, col=1)

    # RV panel
    y  = np.log(rv).dropna()
    yl = y.shift(1).dropna()
    y  = y.loc[yl.index]
    x_line = np.linspace(float(yl.min()), float(yl.max()), 100)
    fig3.add_trace(go.Scatter(x=yl, y=y, mode="markers", marker=dict(size=4, color="white"), name="Data"), row=1, col=2)
    fig3.add_trace(go.Scatter(x=x_line, y=c_rv + phi_rv * x_line, name=f"Fit: y={phi_rv:.2f}·x+{c_rv:.2f}", line=dict(color="magenta")), row=1, col=2)
    fig3.update_xaxes(title_text="log(RV) lagged", row=1, col=2)
    fig3.update_yaxes(title_text="log(RV)", row=1, col=2)

    fig3.update_layout(
        template="plotly_dark",
        height=450,
        margin=dict(l=50, r=20, t=80, b=40),
        font=dict(color="white")
    )
    if hasattr(fig3.layout, "annotations"):
        for a in fig3.layout.annotations:
            a.font = dict(color="white", size=12)
    st.plotly_chart(fig3, use_container_width=True)

    with st.expander("AR(1) Results (raw-style text)", expanded=False):
        buf = io.StringIO()
        print("AR(1) on log(VIX):", file=buf)
        print(f"  φ         = {phi_vix:.4f}", file=buf)
        print(f"  Half-life = {hl_vix:.1f} days", file=buf)
        print(f"  → A one-time shock to log(VIX) decays by half after about {hl_vix:.1f} trading days.", file=buf)
        print("  → |φ| < 1: log(VIX) is stationary (mean-reverting)\n" if abs(phi_vix) < 1 else
              "  → |φ| ≥ 1: log(VIX) is non-stationary (no mean-reversion)\n", file=buf)
        print("AR(1) on log(Realized Vol):", file=buf)
        print(f"  φ         = {phi_rv:.4f}", file=buf)
        print(f"  Half-life = {hl_rv:.1f} days", file=buf)
        print(f"  → A one-time shock to log(Realized Vol) decays by half after about {hl_rv:.1f} trading days.", file=buf)
        print("  → |φ| < 1: log(Realized Vol) is stationary (mean-reverting)\n" if abs(phi_rv) < 1 else
              "  → |φ| ≥ 1: log(Realized Vol) is non-stationary (no mean-reversion)\n", file=buf)
        st.text(buf.getvalue())

    # ---------- Section 4: OU Parameters & Rolling Half-Lives ----------
    st.header("Ornstein–Uhlenbeck (OU) & Rolling Half-Life")
    with st.expander("Methodology", expanded=False):
        st.write("Discrete OU approximation on log-volatility:")
        st.latex(r"x_t - x_{t-1} = a + b\,x_{t-1} + \varepsilon_t \quad \Rightarrow \quad \kappa = -b,\ \ \mu = \frac{a}{\kappa}")
        st.write("Half-life (days):")
        st.latex(r"\mathrm{HL} = \frac{\ln 2}{\kappa} \quad (\kappa>0)")
        st.write("We estimate OU on rolling windows to see how mean-reversion speed changes over time.")

    def _ou_params(x: pd.Series):
        x = x.dropna()
        dx = x.diff().dropna()
        x_lag = x.shift(1).loc[dx.index]
        X = sm.add_constant(x_lag)
        res = sm.OLS(dx, X).fit()
        a = float(res.params['const'])
        b = float(res.params[x_lag.name])
        kappa = -b
        mu = (a / kappa) if kappa != 0 else np.nan
        sigma = float(res.resid.std())
        hl = (np.log(2) / kappa) if kappa > 0 else np.nan
        return kappa, mu, sigma, hl

    κ_vix, μ_vix, σ_vix, hl_vix_ou = _ou_params(np.log(vix))
    κ_rv,  μ_rv,  σ_rv,  hl_rv_ou  = _ou_params(np.log(rv))

    # Rolling half-life series
    def _rolling_hl(x: pd.Series, window: int):
        xs = x.dropna()
        hl = []
        idx = []
        for i in range(window, len(xs)):
            seg = xs.iloc[i-window:i]
            k, _, _, hl_i = _ou_params(seg)
            hl.append(hl_i)
            idx.append(seg.index[-1])
        return pd.Series(hl, index=pd.Index(idx, name="Date"))

    hl_vix_ts = _rolling_hl(np.log(vix), int(ou_roll_window))
    hl_rv_ts  = _rolling_hl(np.log(rv),  int(ou_roll_window))

    med_vix = float(hl_vix_ts.median()) if hl_vix_ts.notna().any() else np.nan
    med_rv  = float(hl_rv_ts.median())  if hl_rv_ts.notna().any()  else np.nan

    fig4 = go.Figure()
    fig4.add_trace(go.Scatter(x=hl_vix_ts.index, y=hl_vix_ts, name="HL log(VIX)", line=dict(color="cyan", width=1)))
    fig4.add_trace(go.Scatter(x=hl_rv_ts.index,  y=hl_rv_ts,  name="HL log(RV)",  line=dict(color="magenta", width=1)))
    if np.isfinite(med_vix):
        fig4.add_hline(y=med_vix, line_dash="dash", line_color="cyan", opacity=0.6)
    if np.isfinite(med_rv):
        fig4.add_hline(y=med_rv, line_dash="dash", line_color="magenta", opacity=0.6)
    fig4.update_yaxes(title_text="Half-life (days)")
    fig4.update_xaxes(
        tickformatstops=_tickformatstops_monthy(),
        showgrid=True, gridcolor="rgba(160,160,160,0.2)",
        showline=True, linecolor="rgba(255,255,255,0.4)"
    )
    fig4.update_layout(
        template="plotly_dark",
        height=450,
        margin=dict(l=60, r=20, t=60, b=40),
        font=dict(color="white")
    )
    st.plotly_chart(fig4, use_container_width=True)

    with st.expander("OU Results (raw-style text)", expanded=False):
        buf = io.StringIO()
        print("OU fit on log(VIX):", file=buf)
        print(f"  κ         = {κ_vix:.4f}", file=buf)
        print(f"  μ         = {μ_vix:.4f}", file=buf)
        print(f"  σ         = {σ_vix:.4f}", file=buf)
        print(f"  Half-life = {hl_vix_ou:.1f} days", file=buf)
        if κ_vix > 0:
            print("  → κ > 0: process is mean-reverting toward μ.", file=buf)
            print(f"  → A shock decays by half in {hl_vix_ou:.1f} trading days.\n", file=buf)
        else:
            print("  → κ ≤ 0: no mean-reversion detected.\n", file=buf)

        print("OU fit on log(Realized Vol):", file=buf)
        print(f"  κ         = {κ_rv:.4f}", file=buf)
        print(f"  μ         = {μ_rv:.4f}", file=buf)
        print(f"  σ         = {σ_rv:.4f}", file=buf)
        print(f"  Half-life = {hl_rv_ou:.1f} days", file=buf)
        if κ_rv > 0:
            print("  → κ > 0: process is mean-reverting toward μ.", file=buf)
            print(f"  → A shock decays by half in {hl_rv_ou:.1f} trading days.\n", file=buf)
        else:
            print("  → κ ≤ 0: no mean-reversion detected.\n", file=buf)

        # Simple interpretation of rolling HLs
        print("Median OU half-life over history:", file=buf)
        print(f"  log(VIX)          = {med_vix:.1f} days", file=buf)
        print(f"  log(Realized Vol) = {med_rv:.1f} days", file=buf)
        if np.isfinite(med_vix) and np.isfinite(med_rv):
            if med_vix < med_rv:
                print("  → On average, log(VIX) mean-reverts faster than log(Realized Vol).\n", file=buf)
            else:
                print("  → On average, log(Realized Vol) mean-reverts faster than log(VIX).\n", file=buf)
        st.text(buf.getvalue())

    # ---------- Section 5: Two-State Markov Regimes ----------
    if run_ms:
        st.header("Two-State Markov Regime Model (log Realized Vol)")
        with st.expander("Methodology", expanded=False):
            st.write("We fit a **two-regime Markov switching** model on log(Realized Vol):")
            st.latex(r"y_t = c_{s_t} + \varepsilon_{t}, \quad \varepsilon_t \sim \mathcal{N}(0,\sigma^2_{s_t}), \quad s_t \in \{0,1\}")
            st.write("The model estimates transition probabilities between regimes and smoothed probabilities over time.")
            st.latex(r"P = \begin{pmatrix}p_{00} & p_{01}\\ p_{10} & p_{11}\end{pmatrix}, \quad \mathbb{E}[\text{spell length in } j] = \frac{1}{1-p_{jj}}")
            st.write("Interpretation: high-vol regime persistence ⇒ longer stressful periods; a rising probability can warn of transitions.")

        series = np.log(rv).dropna()
        if len(series) < 300:
            st.warning("Not enough history to fit a stable Markov model. Increase the date range.")
        else:
            ms = MarkovRegression(series, k_regimes=2, trend='c', switching_variance=True)
            res = ms.fit(disp=False)
            p = res.smoothed_marginal_probabilities  # DataFrame with cols [0,1]

            # Transition matrix
            T = res.model.regime_transition_matrix(res.params).squeeze()
            p00, p01 = float(T[0,0]), float(T[0,1])
            p10, p11 = float(T[1,0]), float(T[1,1])
            exp_len_0 = 1.0 / (1.0 - p00) if p00 < 1 else np.inf
            exp_len_1 = 1.0 / (1.0 - p11) if p11 < 1 else np.inf

            # Which regime is "high vol"?
            mean0 = float((series * p[0]).sum() / p[0].sum())
            mean1 = float((series * p[1]).sum() / p[1].sum())
            high = 1 if mean1 > mean0 else 0
            p_high = p[high]

            # Plot: top series with shading; bottom probability
            fig5 = make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.06,
                                 subplot_titles=("log(Realized Vol) with High-Vol Regime Shading",
                                                 f"Smoothed Probability of High-Vol Regime (Regime {high})"))

            # Top line
            fig5.add_trace(go.Scatter(x=series.index, y=series, name="log(RV)", line=dict(color="white", width=1)), row=1, col=1)

            # Shading spans where p_high>0.5
            mask = (p_high > 0.5)
            grp = (mask != mask.shift()).cumsum()
            for _, span in mask[mask].groupby(grp):
                x0 = span.index[0]; x1 = span.index[-1]
                fig5.add_vrect(x0=x0, x1=x1, line_width=0, fillcolor="red", opacity=0.2, row=1, col=1)

            # Bottom probability
            fig5.add_trace(go.Scatter(x=p_high.index, y=p_high, name=f"P(Regime {high})", line=dict(color="magenta", width=1)), row=2, col=1)
            fig5.add_hline(y=0.5, line_dash="dash", line_color="gray", row=2, col=1)
            fig5.update_yaxes(title_text="log(RV)", row=1, col=1)
            fig5.update_yaxes(title_text="Probability", row=2, col=1)

            fig5.update_xaxes(
                tickformatstops=_tickformatstops_monthy(),
                showgrid=True, gridcolor="rgba(160,160,160,0.2)",
                showline=True, linecolor="rgba(255,255,255,0.4)"
            )
            fig5.update_yaxes(
                showgrid=True, gridcolor="rgba(160,160,160,0.2)",
                showline=True, linecolor="rgba(255,255,255,0.4)"
            )
            fig5.update_layout(
                template="plotly_dark",
                height=600,
                margin=dict(l=60, r=20, t=60, b=40),
                font=dict(color="white")
            )
            if hasattr(fig5.layout, "annotations"):
                for a in fig5.layout.annotations:
                    a.font = dict(color="white", size=12)
            st.plotly_chart(fig5, use_container_width=True)

            with st.expander("Markov Model Results (raw-style text)", expanded=False):
                buf = io.StringIO()
                print("\nEstimated transition probabilities (rows = to, cols = from)", file=buf)
                print("          from Reg-0   from Reg-1", file=buf)
                print(f"to Reg-0   {p00:.4f}       {p10:.4f}", file=buf)
                print(f"to Reg-1   {p01:.4f}       {p11:.4f}", file=buf)
                print("\nInterpretation:", file=buf)
                print(f"• Low-vol regime (Reg-0) persistence = {p00:.2%}. Avg spell ≈ {exp_len_0:.1f} trading days.", file=buf)
                print(f"• High-vol regime (Reg-1) persistence = {p11:.2%}. Avg spell ≈ {exp_len_1:.1f} trading days.", file=buf)
                print(f"• Chance of jumping LOW → HIGH next day = {p01:.2%}.", file=buf)
                print(f"• Chance of jumping HIGH → LOW next day = {p10:.2%}.\n", file=buf)
                st.text(buf.getvalue())

    st.success("Analysis complete.")

# Hide default Streamlit style
st.markdown(
    """
    <style>
    #MainMenu {visibility: hidden;}
    footer {visibility: hidden;}
    </style>
    """,
    unsafe_allow_html=True
)