File size: 13,314 Bytes
7ed431a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
import streamlit as st
import yfinance as yf
import numpy as np
import pandas as pd
from math import ceil
from datetime import datetime, timedelta
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from pandas.tseries.offsets import BDay

st.set_page_config(page_title="Autocorrelation Periodogram", layout="wide")

@st.cache_data(show_spinner=False)
def run_analysis(ticker, start_date, end_date, length, max_lag,
                 lags_per_plot, plot_start_lag, plot_end_lag, data_type):
    df = yf.download(ticker, start=start_date, end=end_date,
                     interval="1d", auto_adjust=True)
    if df.empty:
        return None, "No data available for the given inputs."
    
    if isinstance(df.columns, pd.MultiIndex):
        df.columns = df.columns.get_level_values(0)
    else:
        df.columns = [c.split("_")[0] for c in df.columns]
    
    def ultimate_smoother(src, period):
        a1 = np.exp(-1.414 * np.pi / period)
        c2 = 2.0 * a1 * np.cos(1.414 * np.pi / period)
        c3 = -a1 * a1
        c1 = (1.0 + c2 - c3) / 4.0
        n = len(src)
        out = np.copy(src).astype(float)
        for i in range(3, n):
            out[i] = ((1.0 - c1) * src[i]
                      + (2.0 * c1 - c2) * src[i-1]
                      - (c1 + c3) * src[i-2]
                      + c2 * out[i-1]
                      + c3 * out[i-2])
        return out

    if data_type == "prices":
        raw_series = df["Close"].values
        data_series = ultimate_smoother(raw_series, length)
    elif data_type == "returns":
        prices = df["Close"].values
        log_prices = np.log(prices)
        data_series = np.diff(log_prices, prepend=np.nan)
        data_series[0] = 0.0
    elif data_type == "volatility":
        prices = df["Close"].values
        log_prices = np.log(prices)
        returns = np.diff(log_prices, prepend=np.nan)
        returns[0] = 0.0
        vol_series = pd.Series(returns).rolling(window=length).std().to_numpy()
        vol_series[:length-1] = 0.0
        data_series = vol_series
    else:
        return None, "Invalid data type."

    def compute_autocorrelation(series, window_length, max_lag):
        n = len(series)
        corrs = np.full((n, max_lag+1), np.nan, dtype=float)
        for i in range(window_length - 1, n):
            window = series[i - window_length + 1 : i + 1]
            sum_x  = np.sum(window)
            sum_xx = np.sum(window * window)
            for L in range(max_lag + 1):
                start_lag = i - window_length - L + 1
                end_lag   = i - L + 1
                if start_lag < 0:
                    continue
                window_lag = series[start_lag : end_lag]
                if len(window_lag) != window_length:
                    continue
                sum_y  = np.sum(window_lag)
                sum_yy = np.sum(window_lag * window_lag)
                sum_xy = np.sum(window * window_lag)
                denom_x = window_length * sum_xx - sum_x * sum_x
                denom_y = window_length * sum_yy - sum_y * sum_y
                if denom_x > 0 and denom_y > 0:
                    numer = window_length * sum_xy - sum_x * sum_y
                    corrs[i, L] = numer / np.sqrt(denom_x * denom_y)
        return corrs

    corrs = compute_autocorrelation(data_series, length, max_lag)
    dates = df.index.to_pydatetime()

    def slice_corr(corr_matrix, lag_start, lag_end):
        subset = corr_matrix[:, lag_start : lag_end + 1]
        return subset.T

    plot_range = plot_end_lag - plot_start_lag + 1
    n_plots = ceil(plot_range / lags_per_plot)
    bucket_slices = []
    for i in range(n_plots):
        ls = plot_start_lag + i * lags_per_plot
        le = min(plot_start_lag + (i+1) * lags_per_plot - 1, plot_end_lag)
        subset = slice_corr(corrs, ls, le)
        bucket_slices.append((ls, le, subset))

    colorscale = [[0.0, 'red'], [0.5, 'yellow'], [1.0, 'green']]
    total_rows = 1 + len(bucket_slices)
    subplot_titles = [""]
    for (ls, le, _) in bucket_slices:
        subplot_titles.append(f"ACI {ls}{le}")
    
    fig = make_subplots(
        rows=total_rows, cols=1,
        shared_xaxes=True,
        row_heights=[2] + [1]*len(bucket_slices),
        vertical_spacing=0.03,
        subplot_titles=subplot_titles
    )

    if data_type == "prices":
        fig.add_trace(
            go.Scatter(
                x=dates,
                y=df["Close"],
                mode='lines',
                line=dict(width=1.2),
                name="Close Price"
            ),
            row=1, col=1
        )
        fig.add_trace(
            go.Scatter(
                x=dates,
                y=data_series,
                mode='lines',
                line=dict(width=1.2),
                name="Smoothed Price"
            ),
            row=1, col=1
        )
    elif data_type == "returns":
        fig.add_trace(
            go.Scatter(
                x=dates,
                y=data_series,
                mode='lines',
                line=dict(width=1.2),
                name="Log Returns"
            ),
            row=1, col=1
        )
    elif data_type == "volatility":
        fig.add_trace(
            go.Scatter(
                x=dates,
                y=data_series,
                mode='lines',
                line=dict(width=1.2),
                name="Rolling Volatility"
            ),
            row=1, col=1
        )

    for idx, (ls, le, subset) in enumerate(bucket_slices):
        row_index = idx + 2
        show_colorbar = (idx == len(bucket_slices) - 1)
        heatmap = go.Heatmap(
            x=dates,
            y=list(range(ls, le + 1)),
            z=subset,
            colorscale=colorscale,
            zmin=-1,
            zmax=1,
            showscale=show_colorbar,
            colorbar=dict(title="Correlation") if show_colorbar else None
        )
        fig.add_trace(heatmap, row=row_index, col=1)

    latest_date = pd.Timestamp(df.index[-1])
    for idx, (ls, le, _) in enumerate(bucket_slices):
        row_number = idx + 2
        tickvals = list(range(ls, le + 1))
        ticktext = [f"{lag} ({(latest_date - BDay(lag)).strftime('%Y-%m-%d')})"
                    for lag in tickvals]
        fig.update_yaxes(
            tickmode='array',
            tickvals=tickvals,
            ticktext=ticktext,
            row=row_number,
            tickfont=dict(size=8), #color="white", 
            col=1
        )
    
    fig.update_layout(
        template="plotly_dark",
        title=dict(text=f"Autocorrelation Indicator - {ticker} - {data_type.capitalize()}"),
        height=800 + 200 * len(bucket_slices),
        width=1600,
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.05,
            xanchor="center",
            x=0.5
        )
    )
    fig.update_xaxes(
        type="date",
        tickangle=45,
        tickformat="%Y-%m-%d"
    )

    return {"df": df,
            "data_series": data_series,
            "corrs": corrs,
            "dates": dates,
            "bucket_slices": bucket_slices,
            "fig": fig}, None

# Initialize session state for results.
if "results" not in st.session_state:
    st.session_state.results = {}

# Top radio for page selection.
current_page = st.sidebar.radio("Select Page",
                                options=["Prices", "Returns", "Volatility"],
                                help="Choose analysis type.")

st.sidebar.header("User Inputs")

with st.sidebar.expander("Data Inputs", expanded=True):
    ticker = st.text_input("Ticker", value="SPY", help="Enter the ticker symbol.")
    start_date = st.date_input("Start Date", value=datetime(2020, 1, 1),
                               help="Set the start date for daily data.")
    default_end_date = datetime.today() + timedelta(days=1)
    end_date = st.date_input("End Date", value=default_end_date,
                             help="Set the end date for daily data.")

with st.sidebar.expander("Methodology Parameters", expanded=True):
    length = st.number_input(
        "Window Size", value=20, min_value=1,
        help="Controls how many days are used when comparing current vs past segments. Also used for smoothing (Prices) and rolling window in volatility."
    )

    lags_per_plot = st.number_input(
        "Lags per Plot", value=32, min_value=1,
        help="How many lag rows to include in each heatmap panel."
    )
    plot_start_lag = st.number_input(
        "Plot Start Lag", value=30, min_value=0,
        help="Lower bound of lag range to visualize. Set this to skip very short lags."
    )
    plot_end_lag = st.number_input(
        "Plot End Lag", value=120, min_value=0,
        help="Upper bound of lag range to visualize. The tool will measure similarity with up to this many days in the past."
    )
    
max_lag = plot_end_lag 


# Run Analysis button.
if st.sidebar.button("Run Analysis"):
    st.session_state.ticker = ticker
    st.session_state.start_date = start_date
    st.session_state.end_date = end_date
    st.session_state.length = length
    st.session_state.max_lag = max_lag
    st.session_state.lags_per_plot = lags_per_plot
    st.session_state.plot_start_lag = plot_start_lag
    st.session_state.plot_end_lag = plot_end_lag
    st.session_state.page = current_page

    with st.spinner("Running analysis..."):
        results, error = run_analysis(
            ticker,
            start_date,
            end_date,
            length,
            max_lag,
            lags_per_plot,
            plot_start_lag,
            plot_end_lag,
            current_page.lower()
        )
    st.session_state.results[current_page] = (results, error)

# Always show the main title and description
# Always show the main title and intro
st.title("Autocorrelation Periodogram")
st.markdown(
    "This tool visualizes how market structure repeats across time by computing rolling autocorrelations over many lags.\n\n"
    "You can analyze **Prices**, **Returns**, or **Volatility**. The heatmaps show how much today’s behavior resembles the past at different time horizons."
)

# Methodology expander with math
with st.expander("Methodology", expanded=False):
    st.markdown("""
**Purpose**

Measure how similar the current behavior is to past behavior over multiple lags to detect persistence or reversion in structure.

**Autocorrelation formula**:
""")
    st.latex(r"""
\rho_{t, L} = \frac{\sum_{i=0}^{N-1}(x_{t-i} - \bar{x})(x_{t-L-i} - \bar{y})}
                   {\sqrt{\sum_{i=0}^{N-1}(x_{t-i} - \bar{x})^2} \cdot 
                          \sqrt{\sum_{i=0}^{N-1}(x_{t-L-i} - \bar{y})^2}}
""")
    st.markdown("""
- \( x \): current window  
- \( y \): lagged window shifted by \( L \) days  
- \( N \): window size (set via **Window Size**)  
- \( L \): lag (from 0 to **Max Lag**)

**Inputs** (configured in sidebar):
- **Window Size**: used for autocorrelation and volatility. Also used for smoothing in *Prices* mode.
- **Max Lag**: upper bound on lag values to compute.
- **Lags per Plot**: number of lag rows per heatmap.
- **Plot Start / End Lag**: limits for lags to visualize.

**Output**

The app displays:
- A top panel with the selected series.
- One or more heatmaps below showing autocorrelation across lag ranges.
- Color scale: green = positive correlation (momentum), red = negative correlation (mean reversion), yellow = no structure.
""")

# Show analysis results (if any)
if current_page in st.session_state.results:
    results, error = st.session_state.results[current_page]
    st.markdown(f"### {current_page} Analysis")

    if error:
        st.error(error)
    else:
        lag_start = st.session_state.plot_start_lag
        lag_end = st.session_state.plot_end_lag
        lags_per_plot = st.session_state.lags_per_plot
        n_panels = ceil((lag_end - lag_start + 1) / lags_per_plot)

        if current_page.lower() == "prices":
            st.markdown(f"""
**Input type**: Closing prices (smoothed with Ehlers' filter)  
**Top panel**: Raw close vs smoothed price  
**Lower panels**: Autocorrelation of smoothed prices across {n_panels} lag bands  
**Lag range**: {lag_start} to {lag_end}  
**Window size**: {st.session_state.length}
""")
        elif current_page.lower() == "returns":
            st.markdown(f"""
**Input type**: Log returns  
**Top panel**: Daily log returns  
**Lower panels**: Autocorrelation of returns across {n_panels} lag bands  
**Lag range**: {lag_start} to {lag_end}  
**Window size**: {st.session_state.length}
""")
        elif current_page.lower() == "volatility":
            st.markdown(f"""
**Input type**: Rolling standard deviation of log returns  
**Top panel**: Rolling volatility  
**Lower panels**: Autocorrelation of volatility across {n_panels} lag bands  
**Lag range**: {lag_start} to {lag_end}  
**Window size**: {st.session_state.length}
""")

        st.plotly_chart(results["fig"], use_container_width=True)

else:
    #st.markdown("#### No analysis run yet")
    st.info("Use the sidebar to set parameters and click **Run Analysis** to display results here.")

# Hide default Streamlit style
st.markdown(
    """
    <style>
    #MainMenu {visibility: hidden;}
    footer {visibility: hidden;}
    </style>
    """,
    unsafe_allow_html=True
)