File size: 26,705 Bytes
e0419d5
 
 
 
 
 
de01d22
 
 
 
 
 
 
e0419d5
 
de01d22
 
 
 
e0419d5
de01d22
 
 
e0419d5
 
 
 
de01d22
 
e0419d5
de01d22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e0419d5
de01d22
 
 
e0419d5
 
de01d22
e0419d5
 
de01d22
e0419d5
 
 
 
 
 
 
 
 
de01d22
 
e0419d5
 
 
 
 
 
 
 
 
 
 
 
 
 
de01d22
 
e0419d5
 
de01d22
 
e0419d5
 
 
 
 
de01d22
 
 
 
e0419d5
de01d22
 
 
e0419d5
 
de01d22
e0419d5
de01d22
e0419d5
 
 
 
 
 
 
 
 
 
 
 
de01d22
 
e0419d5
 
de01d22
e0419d5
 
 
de01d22
 
e0419d5
 
 
 
de01d22
e0419d5
 
de01d22
e0419d5
 
de01d22
 
e0419d5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
de01d22
e0419d5
de01d22
 
 
 
e0419d5
de01d22
e0419d5
 
de01d22
e0419d5
de01d22
 
e0419d5
de01d22
 
 
e0419d5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
de01d22
e0419d5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
de01d22
e0419d5
 
 
 
de01d22
e0419d5
 
de01d22
e0419d5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
de01d22
 
e0419d5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
de01d22
e0419d5
 
 
 
 
de01d22
e0419d5
de01d22
 
 
 
 
e0419d5
de01d22
e0419d5
 
de01d22
e0419d5
 
 
de01d22
e0419d5
 
 
 
 
de01d22
e0419d5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
de01d22
e0419d5
 
 
 
de01d22
e0419d5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
de01d22
e0419d5
 
 
 
 
de01d22
e0419d5
de01d22
 
 
 
 
e0419d5
de01d22
 
e0419d5
de01d22
e0419d5
de01d22
 
e0419d5
 
de01d22
 
e0419d5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
de01d22
e0419d5
 
 
 
 
 
 
 
 
 
 
de01d22
 
e0419d5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6c83bcd
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
# app.py — Volatility Mean-Reversion (VIX vs Realized Vol)
# -----------------------------------------------------------------------------
# Requirements:
#   pip install streamlit yfinance statsmodels plotly numpy pandas
# -----------------------------------------------------------------------------

import io
from datetime import datetime, timedelta

import numpy as np
import pandas as pd
import streamlit as st
import yfinance as yf
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.regime_switching.markov_regression import MarkovRegression

# ----------------------------- Page config & header -----------------------------
st.set_page_config(page_title="Volatility Mean-Reversion", layout="wide")
st.title("Volatility Mean-Reversion")

st.write(
    "Compare implied volatility (VIX) with realized SPX volatility, test stationarity, "
    "estimate mean-reversion speed and half-lives (AR(1) & OU), and detect high/low "
    "volatility regimes via a two-state Markov model."
)

# ----------------------------- Sidebar controls -----------------------------
with st.sidebar:
    st.header("Controls")

    with st.expander("Data Window", expanded=False):
        default_start = datetime(2015, 1, 1).date()
        default_end = (datetime.today().date() + timedelta(days=1))
        start_date = st.date_input(
            "Start date",
            value=default_start,
            min_value=datetime(2000, 1, 1).date(),
            max_value=default_end,
            help="Earlier start = more history. Later start = faster."
        )
        end_date = st.date_input(
            "End date",
            value=default_end,
            min_value=default_start,
            max_value=default_end,
            help="Set to today+1 (default) to include the latest close."
        )
        rv_window = st.number_input(
            "Realized-vol window (days)",
            value=21, min_value=5, max_value=126, step=1,
            help="Rolling window for realized volatility (log returns)."
        )

    with st.expander("Scaling (VIX vs RV)", expanded=False):
        scale_mode = st.selectbox(
            "Scaling method",
            options=["Auto (match means)", "Manual"],
            help="Auto scales realized vol to VIX by matching means; Manual uses your factor."
        )
        scale_factor = st.number_input(
            "Manual scale factor",
            value=1.0, step=0.1, format="%.3f",
            help="Only used when 'Manual' is selected.",
            disabled=(scale_mode != "Manual")
        )

    with st.expander("Rolling & ADF", expanded=False):
        roll_win = st.number_input(
            "Rolling (days) for mean/std displays",
            value=252, min_value=60, max_value=756, step=10,
            help="Used to plot rolling mean and standard deviation of log series."
        )
        adf_alpha = st.selectbox(
            "ADF significance level",
            options=[0.10, 0.05, 0.01],
            index=1,
            help="p-value threshold for rejecting unit root (stationarity)."
        )

    with st.expander("OU & Half-Life", expanded=False):
        ou_roll_window = st.number_input(
            "OU rolling window (days)",
            value=252, min_value=126, max_value=756, step=10,
            help="Window for rolling OU half-life estimates."
        )

    with st.expander("Markov Regime Model", expanded=False):
        run_ms = st.checkbox(
            "Run two-state Markov switching on log(Realized Vol)",
            value=True,
            help="Fits a 2-regime model with switching variance and shows shading."
        )

    run_btn = st.button("Run Analysis", type="primary")

# ----------------------------- Data fetch (cached) -----------------------------
@st.cache_data(show_spinner=False)
def fetch_yf_close(tickers: list[str], start: str, end: str) -> pd.DataFrame:
    """
    Yahoo Finance Close prices ONLY (avoid 'Adj Close' confusion).
    Returns a DF with columns ['VIX','SPX'] for ['^VIX','^GSPC'] where possible.
    """
    data = yf.download(tickers, start=start, end=end, progress=False, auto_adjust=False)
    if isinstance(data.columns, pd.MultiIndex):
        out = data['Close'].copy()  # keep only Close
    else:
        out = data[['Close']].copy()
        col_name = tickers[0] if tickers else 'Close'
        out = out.rename(columns={'Close': col_name})

    out = out.rename(columns={'^VIX': 'VIX', '^GSPC': 'SPX'})
    keep = []
    if '^VIX' in tickers or 'VIX' in out.columns: keep.append('VIX')
    if '^GSPC' in tickers or 'SPX' in out.columns: keep.append('SPX')
    if keep:
        out = out[[c for c in keep if c in out.columns]]
    return out.sort_index().ffill()

def _tickformatstops_monthy():
    # Month-aware tick formats that refine as you zoom
    return [
        dict(dtickrange=[None, "M1"],  value="%b %Y"),  # < 1M step
        dict(dtickrange=["M1", "M12"], value="%b %Y"),  # 1M..12M
        dict(dtickrange=["M12", None], value="%Y")      # >= yearly
    ]

# ----------------------------- Run pipeline -----------------------------
if run_btn:
    start_str = pd.to_datetime(start_date).strftime("%Y-%m-%d")
    end_str   = pd.to_datetime(end_date).strftime("%Y-%m-%d")

    with st.spinner("Downloading VIX & SPX…"):
        px = fetch_yf_close(['^VIX', '^GSPC'], start_str, end_str)

    if px.empty or not set(['VIX', 'SPX']).issubset(px.columns):
        st.error("Could not fetch both VIX and SPX 'Close' series. Try a different date range.")
        st.stop()

    vix = px['VIX'].copy()
    spx = px['SPX'].copy()

    # ---------- Section 1: Implied vs Realized Volatility ----------
    st.header("Implied vs Realized Volatility")
    with st.expander("Methodology", expanded=False):
        st.write("We compare **implied volatility (VIX)** to **realized SPX volatility** over a rolling window.")
        st.write("Log returns and realized volatility:")
        st.latex(r"r_t = \ln P_t - \ln P_{t-1}, \qquad \mathrm{RV}_{n}(t) = \sqrt{252}\ \mathrm{stdev}\big(r_{t-n+1},\ldots,r_t\big)")
        st.write("Scaling (to compare levels):")
        st.latex(r"s = \frac{\overline{\mathrm{VIX}}}{\overline{\mathrm{RV}_n}} \quad \Rightarrow \quad \mathrm{RV}^{\mathrm{scaled}}_n = s\cdot \mathrm{RV}_n")
        st.write("Gap:")
        st.latex(r"\Delta_t = \mathrm{VIX}_t - \mathrm{RV}^{\mathrm{scaled}}_{n}(t)")
        st.write(
            "Interpretation: VIX > scaled RV suggests an implied risk premium; VIX < scaled RV suggests realized "
            "volatility is running ‘hot’ relative to implied."
        )

    # Realized volatility
    log_ret = np.log(spx).diff()
    rv = log_ret.rolling(int(rv_window)).std() * np.sqrt(252)
    rv = rv.dropna()

    # Align VIX and compute scaling
    vix = vix.reindex(rv.index).ffill()
    vix_mean = float(vix.mean()) if len(vix) else np.nan
    rv_mean  = float(rv.mean())  if len(rv)  else np.nan
    if scale_mode.startswith("Auto"):
        sf = (vix_mean / rv_mean) if (np.isfinite(vix_mean) and np.isfinite(rv_mean) and rv_mean != 0) else 1.0
    else:
        sf = float(scale_factor)

    rv_scaled = rv * sf
    diff = vix - rv_scaled

    # Plot: VIX vs RV (row 1), Gap (row 2)
    fig1 = make_subplots(
        rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.05,
        specs=[[{"secondary_y": True}], [{}]],
        subplot_titles=("VIX vs Realized Volatility", "VIX − Scaled Realized Volatility")
    )
    # Row 1
    fig1.add_trace(go.Scatter(x=vix.index, y=vix, name="VIX", line=dict(width=1, color="cyan")), row=1, col=1, secondary_y=False)
    fig1.add_trace(go.Scatter(x=rv.index,  y=rv,  name=f"Realized Vol ({int(rv_window)}d)", line=dict(width=1, color="magenta")), row=1, col=1, secondary_y=True)
    fig1.update_yaxes(title_text="VIX", row=1, col=1, secondary_y=False)
    fig1.update_yaxes(title_text="Realized Vol", row=1, col=1, secondary_y=True)

    # Row 2
    fig1.add_trace(go.Scatter(x=diff.index, y=diff, name="VIX − Scaled RV", line=dict(width=1, color="white")), row=2, col=1)
    fig1.add_hline(y=0, line_dash="dash", line_color="gray", row=2, col=1)
    fig1.update_yaxes(title_text="Difference", row=2, col=1)

    # Style
    fig1.update_xaxes(
        tickformatstops=_tickformatstops_monthy(),
        showgrid=True, gridcolor="rgba(160,160,160,0.2)",
        showline=True, linecolor="rgba(255,255,255,0.4)"
    )
    fig1.update_yaxes(
        showgrid=True, gridcolor="rgba(160,160,160,0.2)",
        showline=True, linecolor="rgba(255,255,255,0.4)"
    )
    fig1.update_layout(
        template="plotly_dark",
        height=650,
        margin=dict(l=60, r=20, t=60, b=40),
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="left", x=0),
        font=dict(color="white"),
        hovermode="x unified"
    )
    # Ensure white subplot titles
    if hasattr(fig1.layout, "annotations"):
        for a in fig1.layout.annotations:
            a.font = dict(color="white", size=12)
    st.plotly_chart(fig1, use_container_width=True)

    # ---------- Section 2: Stationarity (ADF) & Rolling Diagnostics ----------
    st.header("Stationarity & Rolling Diagnostics")
    with st.expander("Methodology", expanded=False):
        st.write("Test whether log-volatility is stationary (mean-reverting) using the ADF test.")
        st.latex(r"\text{ADF null: unit root (non-stationary)}\quad\text{vs}\quad \text{stationary (mean-reverting)}")
        st.write("Rolling mean and std provide a visual check of stability over time.")

    # log series
    log_vix      = np.log(vix)
    log_real_vol = np.log(rv)

    # ADF tests
    adf_vix = adfuller(log_vix.dropna(), autolag='AIC')
    adf_rv  = adfuller(log_real_vol.dropna(), autolag='AIC')

    # Rolling plots (two rows)
    fig2 = make_subplots(
        rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.06,
        subplot_titles=(f"log(VIX) with {int(roll_win)}d Rolling Mean & Std",
                        f"log(Realized Vol) with {int(roll_win)}d Rolling Mean & Std")
    )
    # log(VIX)
    fig2.add_trace(go.Scatter(x=log_vix.index, y=log_vix, name="log(VIX)", line=dict(width=1, color="#00d2ff")), row=1, col=1)
    fig2.add_trace(go.Scatter(x=log_vix.index, y=log_vix.rolling(int(roll_win)).mean(), name="Rolling Mean", line=dict(width=1, dash="dash", color="#aaaaaa")), row=1, col=1)
    fig2.add_trace(go.Scatter(x=log_vix.index, y=log_vix.rolling(int(roll_win)).std(),  name="Rolling Std",  line=dict(width=1, dash="dot",  color="#888888")), row=1, col=1)

    # log(RV)
    fig2.add_trace(go.Scatter(x=log_real_vol.index, y=log_real_vol, name="log(Realized Vol)", line=dict(width=1, color="#ff6ad5")), row=2, col=1)
    fig2.add_trace(go.Scatter(x=log_real_vol.index, y=log_real_vol.rolling(int(roll_win)).mean(), name="Rolling Mean", line=dict(width=1, dash="dash", color="#aaaaaa")), row=2, col=1)
    fig2.add_trace(go.Scatter(x=log_real_vol.index, y=log_real_vol.rolling(int(roll_win)).std(),  name="Rolling Std",  line=dict(width=1, dash="dot",  color="#888888")), row=2, col=1)

    fig2.update_yaxes(title_text="Level", row=1, col=1)
    fig2.update_yaxes(title_text="Level", row=2, col=1)

    fig2.update_xaxes(
        tickformatstops=_tickformatstops_monthy(),
        showgrid=True, gridcolor="rgba(160,160,160,0.2)",
        showline=True, linecolor="rgba(255,255,255,0.4)"
    )
    fig2.update_yaxes(
        showgrid=True, gridcolor="rgba(160,160,160,0.2)",
        showline=True, linecolor="rgba(255,255,255,0.4)"
    )
    fig2.update_layout(
        template="plotly_dark",
        height=650,
        margin=dict(l=60, r=20, t=60, b=40),
        font=dict(color="white"),
        hovermode="x unified"
    )
    if hasattr(fig2.layout, "annotations"):
        for a in fig2.layout.annotations:
            a.font = dict(color="white", size=12)
    st.plotly_chart(fig2, use_container_width=True)

    # ADF interpretation (match raw narrative style)
    def _print_adf(name, adf_res, alpha):
        buf = io.StringIO()
        stat, pvalue, usedlag, nobs, crit_vals, icbest = adf_res
        print(f"ADF Test on {name}:", file=buf)
        print(f"  Statistic : {stat:.4f}", file=buf)
        print(f"  p-value   : {pvalue:.4f}", file=buf)
        print("  Critical Values:", file=buf)
        for lvl, val in crit_vals.items():
            print(f"    {lvl}: {val:.4f}", file=buf)
        if (stat < crit_vals['5%']) and (pvalue < alpha):
            print("  → Reject H₀: series is stationary (mean-reverting)\n", file=buf)
        else:
            print("  → Fail to reject H₀: series likely has a unit root (no clear mean-reversion)\n", file=buf)
        return buf.getvalue()

    with st.expander("ADF Results & Interpretation", expanded=False):
        st.text(_print_adf("log(VIX)", adf_vix, adf_alpha))
        st.text(_print_adf("log(Realized Vol)", adf_rv, adf_alpha))

    # ---------- Section 3: AR(1) & Half-Lives ----------
    st.header("AR(1) Mean-Reversion & Shock Half-Lives")
    with st.expander("Methodology", expanded=False):
        st.write("Fit AR(1):")
        st.latex(r"y_t = c + \phi y_{t-1} + \varepsilon_t")
        st.write("Half-life (days) of a one-off shock:")
        st.latex(r"\mathrm{HL} = -\frac{\ln 2}{\ln \phi} \quad \text{(valid if } 0<\phi<1\text{)}")
        st.write("Interpretation: smaller HL ⇒ faster mean-reversion.")

    def estimate_ar1(series):
        y = series.dropna()
        y_lag = y.shift(1).dropna()
        y = y.loc[y_lag.index]
        X = sm.add_constant(y_lag)
        res = sm.OLS(y, X).fit()
        return float(res.params['const']), float(res.params[1])

    c_vix, phi_vix = estimate_ar1(np.log(vix))
    c_rv,  phi_rv  = estimate_ar1(np.log(rv))

    # Half-lives (guard domain)
    hl_vix = (-np.log(2) / np.log(phi_vix)) if (phi_vix > 0 and phi_vix != 1) else np.nan
    hl_rv  = (-np.log(2) / np.log(phi_rv))  if (phi_rv > 0 and phi_rv  != 1) else np.nan

    # Scatter & regression lines
    fig3 = make_subplots(
        rows=1, cols=2, subplot_titles=(f"AR(1) on log(VIX)\nφ={phi_vix:.3f}, HL={hl_vix:.1f}d",
                                        f"AR(1) on log(Realized Vol)\nφ={phi_rv:.3f}, HL={hl_rv:.1f}d")
    )

    # VIX panel
    y  = np.log(vix).dropna()
    yl = y.shift(1).dropna()
    y  = y.loc[yl.index]
    x_line = np.linspace(float(yl.min()), float(yl.max()), 100)
    fig3.add_trace(go.Scatter(x=yl, y=y, mode="markers", marker=dict(size=4, color="white"), name="Data"), row=1, col=1)
    fig3.add_trace(go.Scatter(x=x_line, y=c_vix + phi_vix * x_line, name=f"Fit: y={phi_vix:.2f}·x+{c_vix:.2f}", line=dict(color="cyan")), row=1, col=1)
    fig3.update_xaxes(title_text="log(VIX) lagged", row=1, col=1)
    fig3.update_yaxes(title_text="log(VIX)", row=1, col=1)

    # RV panel
    y  = np.log(rv).dropna()
    yl = y.shift(1).dropna()
    y  = y.loc[yl.index]
    x_line = np.linspace(float(yl.min()), float(yl.max()), 100)
    fig3.add_trace(go.Scatter(x=yl, y=y, mode="markers", marker=dict(size=4, color="white"), name="Data"), row=1, col=2)
    fig3.add_trace(go.Scatter(x=x_line, y=c_rv + phi_rv * x_line, name=f"Fit: y={phi_rv:.2f}·x+{c_rv:.2f}", line=dict(color="magenta")), row=1, col=2)
    fig3.update_xaxes(title_text="log(RV) lagged", row=1, col=2)
    fig3.update_yaxes(title_text="log(RV)", row=1, col=2)

    fig3.update_layout(
        template="plotly_dark",
        height=450,
        margin=dict(l=50, r=20, t=80, b=40),
        font=dict(color="white")
    )
    if hasattr(fig3.layout, "annotations"):
        for a in fig3.layout.annotations:
            a.font = dict(color="white", size=12)
    st.plotly_chart(fig3, use_container_width=True)

    with st.expander("AR(1) Results (raw-style text)", expanded=False):
        buf = io.StringIO()
        print("AR(1) on log(VIX):", file=buf)
        print(f"  φ         = {phi_vix:.4f}", file=buf)
        print(f"  Half-life = {hl_vix:.1f} days", file=buf)
        print(f"  → A one-time shock to log(VIX) decays by half after about {hl_vix:.1f} trading days.", file=buf)
        print("  → |φ| < 1: log(VIX) is stationary (mean-reverting)\n" if abs(phi_vix) < 1 else
              "  → |φ| ≥ 1: log(VIX) is non-stationary (no mean-reversion)\n", file=buf)
        print("AR(1) on log(Realized Vol):", file=buf)
        print(f"  φ         = {phi_rv:.4f}", file=buf)
        print(f"  Half-life = {hl_rv:.1f} days", file=buf)
        print(f"  → A one-time shock to log(Realized Vol) decays by half after about {hl_rv:.1f} trading days.", file=buf)
        print("  → |φ| < 1: log(Realized Vol) is stationary (mean-reverting)\n" if abs(phi_rv) < 1 else
              "  → |φ| ≥ 1: log(Realized Vol) is non-stationary (no mean-reversion)\n", file=buf)
        st.text(buf.getvalue())

    # ---------- Section 4: OU Parameters & Rolling Half-Lives ----------
    st.header("Ornstein–Uhlenbeck (OU) & Rolling Half-Life")
    with st.expander("Methodology", expanded=False):
        st.write("Discrete OU approximation on log-volatility:")
        st.latex(r"x_t - x_{t-1} = a + b\,x_{t-1} + \varepsilon_t \quad \Rightarrow \quad \kappa = -b,\ \ \mu = \frac{a}{\kappa}")
        st.write("Half-life (days):")
        st.latex(r"\mathrm{HL} = \frac{\ln 2}{\kappa} \quad (\kappa>0)")
        st.write("We estimate OU on rolling windows to see how mean-reversion speed changes over time.")

    def _ou_params(x: pd.Series):
        x = x.dropna()
        dx = x.diff().dropna()
        x_lag = x.shift(1).loc[dx.index]
        X = sm.add_constant(x_lag)
        res = sm.OLS(dx, X).fit()
        a = float(res.params['const'])
        b = float(res.params[x_lag.name])
        kappa = -b
        mu = (a / kappa) if kappa != 0 else np.nan
        sigma = float(res.resid.std())
        hl = (np.log(2) / kappa) if kappa > 0 else np.nan
        return kappa, mu, sigma, hl

    κ_vix, μ_vix, σ_vix, hl_vix_ou = _ou_params(np.log(vix))
    κ_rv,  μ_rv,  σ_rv,  hl_rv_ou  = _ou_params(np.log(rv))

    # Rolling half-life series
    def _rolling_hl(x: pd.Series, window: int):
        xs = x.dropna()
        hl = []
        idx = []
        for i in range(window, len(xs)):
            seg = xs.iloc[i-window:i]
            k, _, _, hl_i = _ou_params(seg)
            hl.append(hl_i)
            idx.append(seg.index[-1])
        return pd.Series(hl, index=pd.Index(idx, name="Date"))

    hl_vix_ts = _rolling_hl(np.log(vix), int(ou_roll_window))
    hl_rv_ts  = _rolling_hl(np.log(rv),  int(ou_roll_window))

    med_vix = float(hl_vix_ts.median()) if hl_vix_ts.notna().any() else np.nan
    med_rv  = float(hl_rv_ts.median())  if hl_rv_ts.notna().any()  else np.nan

    fig4 = go.Figure()
    fig4.add_trace(go.Scatter(x=hl_vix_ts.index, y=hl_vix_ts, name="HL log(VIX)", line=dict(color="cyan", width=1)))
    fig4.add_trace(go.Scatter(x=hl_rv_ts.index,  y=hl_rv_ts,  name="HL log(RV)",  line=dict(color="magenta", width=1)))
    if np.isfinite(med_vix):
        fig4.add_hline(y=med_vix, line_dash="dash", line_color="cyan", opacity=0.6)
    if np.isfinite(med_rv):
        fig4.add_hline(y=med_rv, line_dash="dash", line_color="magenta", opacity=0.6)
    fig4.update_yaxes(title_text="Half-life (days)")
    fig4.update_xaxes(
        tickformatstops=_tickformatstops_monthy(),
        showgrid=True, gridcolor="rgba(160,160,160,0.2)",
        showline=True, linecolor="rgba(255,255,255,0.4)"
    )
    fig4.update_layout(
        template="plotly_dark",
        height=450,
        margin=dict(l=60, r=20, t=60, b=40),
        font=dict(color="white")
    )
    st.plotly_chart(fig4, use_container_width=True)

    with st.expander("OU Results (raw-style text)", expanded=False):
        buf = io.StringIO()
        print("OU fit on log(VIX):", file=buf)
        print(f"  κ         = {κ_vix:.4f}", file=buf)
        print(f"  μ         = {μ_vix:.4f}", file=buf)
        print(f"  σ         = {σ_vix:.4f}", file=buf)
        print(f"  Half-life = {hl_vix_ou:.1f} days", file=buf)
        if κ_vix > 0:
            print("  → κ > 0: process is mean-reverting toward μ.", file=buf)
            print(f"  → A shock decays by half in {hl_vix_ou:.1f} trading days.\n", file=buf)
        else:
            print("  → κ ≤ 0: no mean-reversion detected.\n", file=buf)

        print("OU fit on log(Realized Vol):", file=buf)
        print(f"  κ         = {κ_rv:.4f}", file=buf)
        print(f"  μ         = {μ_rv:.4f}", file=buf)
        print(f"  σ         = {σ_rv:.4f}", file=buf)
        print(f"  Half-life = {hl_rv_ou:.1f} days", file=buf)
        if κ_rv > 0:
            print("  → κ > 0: process is mean-reverting toward μ.", file=buf)
            print(f"  → A shock decays by half in {hl_rv_ou:.1f} trading days.\n", file=buf)
        else:
            print("  → κ ≤ 0: no mean-reversion detected.\n", file=buf)

        # Simple interpretation of rolling HLs
        print("Median OU half-life over history:", file=buf)
        print(f"  log(VIX)          = {med_vix:.1f} days", file=buf)
        print(f"  log(Realized Vol) = {med_rv:.1f} days", file=buf)
        if np.isfinite(med_vix) and np.isfinite(med_rv):
            if med_vix < med_rv:
                print("  → On average, log(VIX) mean-reverts faster than log(Realized Vol).\n", file=buf)
            else:
                print("  → On average, log(Realized Vol) mean-reverts faster than log(VIX).\n", file=buf)
        st.text(buf.getvalue())

    # ---------- Section 5: Two-State Markov Regimes ----------
    if run_ms:
        st.header("Two-State Markov Regime Model (log Realized Vol)")
        with st.expander("Methodology", expanded=False):
            st.write("We fit a **two-regime Markov switching** model on log(Realized Vol):")
            st.latex(r"y_t = c_{s_t} + \varepsilon_{t}, \quad \varepsilon_t \sim \mathcal{N}(0,\sigma^2_{s_t}), \quad s_t \in \{0,1\}")
            st.write("The model estimates transition probabilities between regimes and smoothed probabilities over time.")
            st.latex(r"P = \begin{pmatrix}p_{00} & p_{01}\\ p_{10} & p_{11}\end{pmatrix}, \quad \mathbb{E}[\text{spell length in } j] = \frac{1}{1-p_{jj}}")
            st.write("Interpretation: high-vol regime persistence ⇒ longer stressful periods; a rising probability can warn of transitions.")

        series = np.log(rv).dropna()
        if len(series) < 300:
            st.warning("Not enough history to fit a stable Markov model. Increase the date range.")
        else:
            ms = MarkovRegression(series, k_regimes=2, trend='c', switching_variance=True)
            res = ms.fit(disp=False)
            p = res.smoothed_marginal_probabilities  # DataFrame with cols [0,1]

            # Transition matrix
            T = res.model.regime_transition_matrix(res.params).squeeze()
            p00, p01 = float(T[0,0]), float(T[0,1])
            p10, p11 = float(T[1,0]), float(T[1,1])
            exp_len_0 = 1.0 / (1.0 - p00) if p00 < 1 else np.inf
            exp_len_1 = 1.0 / (1.0 - p11) if p11 < 1 else np.inf

            # Which regime is "high vol"?
            mean0 = float((series * p[0]).sum() / p[0].sum())
            mean1 = float((series * p[1]).sum() / p[1].sum())
            high = 1 if mean1 > mean0 else 0
            p_high = p[high]

            # Plot: top series with shading; bottom probability
            fig5 = make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.06,
                                 subplot_titles=("log(Realized Vol) with High-Vol Regime Shading",
                                                 f"Smoothed Probability of High-Vol Regime (Regime {high})"))

            # Top line
            fig5.add_trace(go.Scatter(x=series.index, y=series, name="log(RV)", line=dict(color="white", width=1)), row=1, col=1)

            # Shading spans where p_high>0.5
            mask = (p_high > 0.5)
            grp = (mask != mask.shift()).cumsum()
            for _, span in mask[mask].groupby(grp):
                x0 = span.index[0]; x1 = span.index[-1]
                fig5.add_vrect(x0=x0, x1=x1, line_width=0, fillcolor="red", opacity=0.2, row=1, col=1)

            # Bottom probability
            fig5.add_trace(go.Scatter(x=p_high.index, y=p_high, name=f"P(Regime {high})", line=dict(color="magenta", width=1)), row=2, col=1)
            fig5.add_hline(y=0.5, line_dash="dash", line_color="gray", row=2, col=1)
            fig5.update_yaxes(title_text="log(RV)", row=1, col=1)
            fig5.update_yaxes(title_text="Probability", row=2, col=1)

            fig5.update_xaxes(
                tickformatstops=_tickformatstops_monthy(),
                showgrid=True, gridcolor="rgba(160,160,160,0.2)",
                showline=True, linecolor="rgba(255,255,255,0.4)"
            )
            fig5.update_yaxes(
                showgrid=True, gridcolor="rgba(160,160,160,0.2)",
                showline=True, linecolor="rgba(255,255,255,0.4)"
            )
            fig5.update_layout(
                template="plotly_dark",
                height=600,
                margin=dict(l=60, r=20, t=60, b=40),
                font=dict(color="white")
            )
            if hasattr(fig5.layout, "annotations"):
                for a in fig5.layout.annotations:
                    a.font = dict(color="white", size=12)
            st.plotly_chart(fig5, use_container_width=True)

            with st.expander("Markov Model Results (raw-style text)", expanded=False):
                buf = io.StringIO()
                print("\nEstimated transition probabilities (rows = to, cols = from)", file=buf)
                print("          from Reg-0   from Reg-1", file=buf)
                print(f"to Reg-0   {p00:.4f}       {p10:.4f}", file=buf)
                print(f"to Reg-1   {p01:.4f}       {p11:.4f}", file=buf)
                print("\nInterpretation:", file=buf)
                print(f"• Low-vol regime (Reg-0) persistence = {p00:.2%}. Avg spell ≈ {exp_len_0:.1f} trading days.", file=buf)
                print(f"• High-vol regime (Reg-1) persistence = {p11:.2%}. Avg spell ≈ {exp_len_1:.1f} trading days.", file=buf)
                print(f"• Chance of jumping LOW → HIGH next day = {p01:.2%}.", file=buf)
                print(f"• Chance of jumping HIGH → LOW next day = {p10:.2%}.\n", file=buf)
                st.text(buf.getvalue())

    st.success("Analysis complete.")

# Hide default Streamlit style
st.markdown(
    """
    <style>
    #MainMenu {visibility: hidden;}
    footer {visibility: hidden;}
    </style>
    """,
    unsafe_allow_html=True
)