Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import yfinance as yf | |
| import numpy as np | |
| import pandas as pd | |
| from math import ceil | |
| from datetime import datetime, timedelta | |
| import plotly.graph_objects as go | |
| from plotly.subplots import make_subplots | |
| from pandas.tseries.offsets import BDay | |
| st.set_page_config(page_title="Autocorrelation Periodogram", layout="wide") | |
| def run_analysis(ticker, start_date, end_date, length, max_lag, | |
| lags_per_plot, plot_start_lag, plot_end_lag, data_type): | |
| df = yf.download(ticker, start=start_date, end=end_date, | |
| interval="1d", auto_adjust=True) | |
| if df.empty: | |
| return None, "No data available for the given inputs." | |
| if isinstance(df.columns, pd.MultiIndex): | |
| df.columns = df.columns.get_level_values(0) | |
| else: | |
| df.columns = [c.split("_")[0] for c in df.columns] | |
| def ultimate_smoother(src, period): | |
| a1 = np.exp(-1.414 * np.pi / period) | |
| c2 = 2.0 * a1 * np.cos(1.414 * np.pi / period) | |
| c3 = -a1 * a1 | |
| c1 = (1.0 + c2 - c3) / 4.0 | |
| n = len(src) | |
| out = np.copy(src).astype(float) | |
| for i in range(3, n): | |
| out[i] = ((1.0 - c1) * src[i] | |
| + (2.0 * c1 - c2) * src[i-1] | |
| - (c1 + c3) * src[i-2] | |
| + c2 * out[i-1] | |
| + c3 * out[i-2]) | |
| return out | |
| if data_type == "prices": | |
| raw_series = df["Close"].values | |
| data_series = ultimate_smoother(raw_series, length) | |
| elif data_type == "returns": | |
| prices = df["Close"].values | |
| log_prices = np.log(prices) | |
| data_series = np.diff(log_prices, prepend=np.nan) | |
| data_series[0] = 0.0 | |
| elif data_type == "volatility": | |
| prices = df["Close"].values | |
| log_prices = np.log(prices) | |
| returns = np.diff(log_prices, prepend=np.nan) | |
| returns[0] = 0.0 | |
| vol_series = pd.Series(returns).rolling(window=length).std().to_numpy() | |
| vol_series[:length-1] = 0.0 | |
| data_series = vol_series | |
| else: | |
| return None, "Invalid data type." | |
| def compute_autocorrelation(series, window_length, max_lag): | |
| n = len(series) | |
| corrs = np.full((n, max_lag+1), np.nan, dtype=float) | |
| for i in range(window_length - 1, n): | |
| window = series[i - window_length + 1 : i + 1] | |
| sum_x = np.sum(window) | |
| sum_xx = np.sum(window * window) | |
| for L in range(max_lag + 1): | |
| start_lag = i - window_length - L + 1 | |
| end_lag = i - L + 1 | |
| if start_lag < 0: | |
| continue | |
| window_lag = series[start_lag : end_lag] | |
| if len(window_lag) != window_length: | |
| continue | |
| sum_y = np.sum(window_lag) | |
| sum_yy = np.sum(window_lag * window_lag) | |
| sum_xy = np.sum(window * window_lag) | |
| denom_x = window_length * sum_xx - sum_x * sum_x | |
| denom_y = window_length * sum_yy - sum_y * sum_y | |
| if denom_x > 0 and denom_y > 0: | |
| numer = window_length * sum_xy - sum_x * sum_y | |
| corrs[i, L] = numer / np.sqrt(denom_x * denom_y) | |
| return corrs | |
| corrs = compute_autocorrelation(data_series, length, max_lag) | |
| dates = df.index.to_pydatetime() | |
| def slice_corr(corr_matrix, lag_start, lag_end): | |
| subset = corr_matrix[:, lag_start : lag_end + 1] | |
| return subset.T | |
| plot_range = plot_end_lag - plot_start_lag + 1 | |
| n_plots = ceil(plot_range / lags_per_plot) | |
| bucket_slices = [] | |
| for i in range(n_plots): | |
| ls = plot_start_lag + i * lags_per_plot | |
| le = min(plot_start_lag + (i+1) * lags_per_plot - 1, plot_end_lag) | |
| subset = slice_corr(corrs, ls, le) | |
| bucket_slices.append((ls, le, subset)) | |
| colorscale = [[0.0, 'red'], [0.5, 'yellow'], [1.0, 'green']] | |
| total_rows = 1 + len(bucket_slices) | |
| subplot_titles = [""] | |
| for (ls, le, _) in bucket_slices: | |
| subplot_titles.append(f"ACI {ls}–{le}") | |
| fig = make_subplots( | |
| rows=total_rows, cols=1, | |
| shared_xaxes=True, | |
| row_heights=[2] + [1]*len(bucket_slices), | |
| vertical_spacing=0.03, | |
| subplot_titles=subplot_titles | |
| ) | |
| if data_type == "prices": | |
| fig.add_trace( | |
| go.Scatter( | |
| x=dates, | |
| y=df["Close"], | |
| mode='lines', | |
| line=dict(width=1.2), | |
| name="Close Price" | |
| ), | |
| row=1, col=1 | |
| ) | |
| fig.add_trace( | |
| go.Scatter( | |
| x=dates, | |
| y=data_series, | |
| mode='lines', | |
| line=dict(width=1.2), | |
| name="Smoothed Price" | |
| ), | |
| row=1, col=1 | |
| ) | |
| elif data_type == "returns": | |
| fig.add_trace( | |
| go.Scatter( | |
| x=dates, | |
| y=data_series, | |
| mode='lines', | |
| line=dict(width=1.2), | |
| name="Log Returns" | |
| ), | |
| row=1, col=1 | |
| ) | |
| elif data_type == "volatility": | |
| fig.add_trace( | |
| go.Scatter( | |
| x=dates, | |
| y=data_series, | |
| mode='lines', | |
| line=dict(width=1.2), | |
| name="Rolling Volatility" | |
| ), | |
| row=1, col=1 | |
| ) | |
| for idx, (ls, le, subset) in enumerate(bucket_slices): | |
| row_index = idx + 2 | |
| show_colorbar = (idx == len(bucket_slices) - 1) | |
| heatmap = go.Heatmap( | |
| x=dates, | |
| y=list(range(ls, le + 1)), | |
| z=subset, | |
| colorscale=colorscale, | |
| zmin=-1, | |
| zmax=1, | |
| showscale=show_colorbar, | |
| colorbar=dict(title="Correlation") if show_colorbar else None | |
| ) | |
| fig.add_trace(heatmap, row=row_index, col=1) | |
| latest_date = pd.Timestamp(df.index[-1]) | |
| for idx, (ls, le, _) in enumerate(bucket_slices): | |
| row_number = idx + 2 | |
| tickvals = list(range(ls, le + 1)) | |
| ticktext = [f"{lag} ({(latest_date - BDay(lag)).strftime('%Y-%m-%d')})" | |
| for lag in tickvals] | |
| fig.update_yaxes( | |
| tickmode='array', | |
| tickvals=tickvals, | |
| ticktext=ticktext, | |
| row=row_number, | |
| tickfont=dict(size=8), #color="white", | |
| col=1 | |
| ) | |
| fig.update_layout( | |
| template="plotly_dark", | |
| title=dict(text=f"Autocorrelation Indicator - {ticker} - {data_type.capitalize()}"), | |
| height=800 + 200 * len(bucket_slices), | |
| width=1600, | |
| legend=dict( | |
| orientation="h", | |
| yanchor="bottom", | |
| y=1.05, | |
| xanchor="center", | |
| x=0.5 | |
| ) | |
| ) | |
| fig.update_xaxes( | |
| type="date", | |
| tickangle=45, | |
| tickformat="%Y-%m-%d" | |
| ) | |
| return {"df": df, | |
| "data_series": data_series, | |
| "corrs": corrs, | |
| "dates": dates, | |
| "bucket_slices": bucket_slices, | |
| "fig": fig}, None | |
| # Initialize session state for results. | |
| if "results" not in st.session_state: | |
| st.session_state.results = {} | |
| # Top radio for page selection. | |
| current_page = st.sidebar.radio("Select Page", | |
| options=["Prices", "Returns", "Volatility"], | |
| help="Choose analysis type.") | |
| st.sidebar.header("User Inputs") | |
| with st.sidebar.expander("Data Inputs", expanded=True): | |
| ticker = st.text_input("Ticker", value="SPY", help="Enter the ticker symbol.") | |
| start_date = st.date_input("Start Date", value=datetime(2020, 1, 1), | |
| help="Set the start date for daily data.") | |
| default_end_date = datetime.today() + timedelta(days=1) | |
| end_date = st.date_input("End Date", value=default_end_date, | |
| help="Set the end date for daily data.") | |
| with st.sidebar.expander("Methodology Parameters", expanded=True): | |
| length = st.number_input( | |
| "Window Size", value=20, min_value=1, | |
| help="Controls how many days are used when comparing current vs past segments. Also used for smoothing (Prices) and rolling window in volatility." | |
| ) | |
| lags_per_plot = st.number_input( | |
| "Lags per Plot", value=32, min_value=1, | |
| help="How many lag rows to include in each heatmap panel." | |
| ) | |
| plot_start_lag = st.number_input( | |
| "Plot Start Lag", value=30, min_value=0, | |
| help="Lower bound of lag range to visualize. Set this to skip very short lags." | |
| ) | |
| plot_end_lag = st.number_input( | |
| "Plot End Lag", value=120, min_value=0, | |
| help="Upper bound of lag range to visualize. The tool will measure similarity with up to this many days in the past." | |
| ) | |
| max_lag = plot_end_lag | |
| # Run Analysis button. | |
| if st.sidebar.button("Run Analysis"): | |
| st.session_state.ticker = ticker | |
| st.session_state.start_date = start_date | |
| st.session_state.end_date = end_date | |
| st.session_state.length = length | |
| st.session_state.max_lag = max_lag | |
| st.session_state.lags_per_plot = lags_per_plot | |
| st.session_state.plot_start_lag = plot_start_lag | |
| st.session_state.plot_end_lag = plot_end_lag | |
| st.session_state.page = current_page | |
| with st.spinner("Running analysis..."): | |
| results, error = run_analysis( | |
| ticker, | |
| start_date, | |
| end_date, | |
| length, | |
| max_lag, | |
| lags_per_plot, | |
| plot_start_lag, | |
| plot_end_lag, | |
| current_page.lower() | |
| ) | |
| st.session_state.results[current_page] = (results, error) | |
| # Always show the main title and description | |
| # Always show the main title and intro | |
| st.title("Autocorrelation Periodogram") | |
| st.markdown( | |
| "This tool visualizes how market structure repeats across time by computing rolling autocorrelations over many lags.\n\n" | |
| "You can analyze **Prices**, **Returns**, or **Volatility**. The heatmaps show how much today’s behavior resembles the past at different time horizons." | |
| ) | |
| # Methodology expander with math | |
| with st.expander("Methodology", expanded=False): | |
| st.markdown(""" | |
| **Purpose** | |
| Measure how similar the current behavior is to past behavior over multiple lags to detect persistence or reversion in structure. | |
| **Autocorrelation formula**: | |
| """) | |
| st.latex(r""" | |
| \rho_{t, L} = \frac{\sum_{i=0}^{N-1}(x_{t-i} - \bar{x})(x_{t-L-i} - \bar{y})} | |
| {\sqrt{\sum_{i=0}^{N-1}(x_{t-i} - \bar{x})^2} \cdot | |
| \sqrt{\sum_{i=0}^{N-1}(x_{t-L-i} - \bar{y})^2}} | |
| """) | |
| st.markdown(""" | |
| - \( x \): current window | |
| - \( y \): lagged window shifted by \( L \) days | |
| - \( N \): window size (set via **Window Size**) | |
| - \( L \): lag (from 0 to **Max Lag**) | |
| **Inputs** (configured in sidebar): | |
| - **Window Size**: used for autocorrelation and volatility. Also used for smoothing in *Prices* mode. | |
| - **Max Lag**: upper bound on lag values to compute. | |
| - **Lags per Plot**: number of lag rows per heatmap. | |
| - **Plot Start / End Lag**: limits for lags to visualize. | |
| **Output** | |
| The app displays: | |
| - A top panel with the selected series. | |
| - One or more heatmaps below showing autocorrelation across lag ranges. | |
| - Color scale: green = positive correlation (momentum), red = negative correlation (mean reversion), yellow = no structure. | |
| """) | |
| # Show analysis results (if any) | |
| if current_page in st.session_state.results: | |
| results, error = st.session_state.results[current_page] | |
| st.markdown(f"### {current_page} Analysis") | |
| if error: | |
| st.error(error) | |
| else: | |
| lag_start = st.session_state.plot_start_lag | |
| lag_end = st.session_state.plot_end_lag | |
| lags_per_plot = st.session_state.lags_per_plot | |
| n_panels = ceil((lag_end - lag_start + 1) / lags_per_plot) | |
| if current_page.lower() == "prices": | |
| st.markdown(f""" | |
| **Input type**: Closing prices (smoothed with Ehlers' filter) | |
| **Top panel**: Raw close vs smoothed price | |
| **Lower panels**: Autocorrelation of smoothed prices across {n_panels} lag bands | |
| **Lag range**: {lag_start} to {lag_end} | |
| **Window size**: {st.session_state.length} | |
| """) | |
| elif current_page.lower() == "returns": | |
| st.markdown(f""" | |
| **Input type**: Log returns | |
| **Top panel**: Daily log returns | |
| **Lower panels**: Autocorrelation of returns across {n_panels} lag bands | |
| **Lag range**: {lag_start} to {lag_end} | |
| **Window size**: {st.session_state.length} | |
| """) | |
| elif current_page.lower() == "volatility": | |
| st.markdown(f""" | |
| **Input type**: Rolling standard deviation of log returns | |
| **Top panel**: Rolling volatility | |
| **Lower panels**: Autocorrelation of volatility across {n_panels} lag bands | |
| **Lag range**: {lag_start} to {lag_end} | |
| **Window size**: {st.session_state.length} | |
| """) | |
| st.plotly_chart(results["fig"], use_container_width=True) | |
| else: | |
| #st.markdown("#### No analysis run yet") | |
| st.info("Use the sidebar to set parameters and click **Run Analysis** to display results here.") | |
| # Hide default Streamlit style | |
| st.markdown( | |
| """ | |
| <style> | |
| #MainMenu {visibility: hidden;} | |
| footer {visibility: hidden;} | |
| </style> | |
| """, | |
| unsafe_allow_html=True | |
| ) | |