Space72 / app.py
QuantumLearner's picture
Create app.py
7ed431a verified
import streamlit as st
import yfinance as yf
import numpy as np
import pandas as pd
from math import ceil
from datetime import datetime, timedelta
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from pandas.tseries.offsets import BDay
st.set_page_config(page_title="Autocorrelation Periodogram", layout="wide")
@st.cache_data(show_spinner=False)
def run_analysis(ticker, start_date, end_date, length, max_lag,
lags_per_plot, plot_start_lag, plot_end_lag, data_type):
df = yf.download(ticker, start=start_date, end=end_date,
interval="1d", auto_adjust=True)
if df.empty:
return None, "No data available for the given inputs."
if isinstance(df.columns, pd.MultiIndex):
df.columns = df.columns.get_level_values(0)
else:
df.columns = [c.split("_")[0] for c in df.columns]
def ultimate_smoother(src, period):
a1 = np.exp(-1.414 * np.pi / period)
c2 = 2.0 * a1 * np.cos(1.414 * np.pi / period)
c3 = -a1 * a1
c1 = (1.0 + c2 - c3) / 4.0
n = len(src)
out = np.copy(src).astype(float)
for i in range(3, n):
out[i] = ((1.0 - c1) * src[i]
+ (2.0 * c1 - c2) * src[i-1]
- (c1 + c3) * src[i-2]
+ c2 * out[i-1]
+ c3 * out[i-2])
return out
if data_type == "prices":
raw_series = df["Close"].values
data_series = ultimate_smoother(raw_series, length)
elif data_type == "returns":
prices = df["Close"].values
log_prices = np.log(prices)
data_series = np.diff(log_prices, prepend=np.nan)
data_series[0] = 0.0
elif data_type == "volatility":
prices = df["Close"].values
log_prices = np.log(prices)
returns = np.diff(log_prices, prepend=np.nan)
returns[0] = 0.0
vol_series = pd.Series(returns).rolling(window=length).std().to_numpy()
vol_series[:length-1] = 0.0
data_series = vol_series
else:
return None, "Invalid data type."
def compute_autocorrelation(series, window_length, max_lag):
n = len(series)
corrs = np.full((n, max_lag+1), np.nan, dtype=float)
for i in range(window_length - 1, n):
window = series[i - window_length + 1 : i + 1]
sum_x = np.sum(window)
sum_xx = np.sum(window * window)
for L in range(max_lag + 1):
start_lag = i - window_length - L + 1
end_lag = i - L + 1
if start_lag < 0:
continue
window_lag = series[start_lag : end_lag]
if len(window_lag) != window_length:
continue
sum_y = np.sum(window_lag)
sum_yy = np.sum(window_lag * window_lag)
sum_xy = np.sum(window * window_lag)
denom_x = window_length * sum_xx - sum_x * sum_x
denom_y = window_length * sum_yy - sum_y * sum_y
if denom_x > 0 and denom_y > 0:
numer = window_length * sum_xy - sum_x * sum_y
corrs[i, L] = numer / np.sqrt(denom_x * denom_y)
return corrs
corrs = compute_autocorrelation(data_series, length, max_lag)
dates = df.index.to_pydatetime()
def slice_corr(corr_matrix, lag_start, lag_end):
subset = corr_matrix[:, lag_start : lag_end + 1]
return subset.T
plot_range = plot_end_lag - plot_start_lag + 1
n_plots = ceil(plot_range / lags_per_plot)
bucket_slices = []
for i in range(n_plots):
ls = plot_start_lag + i * lags_per_plot
le = min(plot_start_lag + (i+1) * lags_per_plot - 1, plot_end_lag)
subset = slice_corr(corrs, ls, le)
bucket_slices.append((ls, le, subset))
colorscale = [[0.0, 'red'], [0.5, 'yellow'], [1.0, 'green']]
total_rows = 1 + len(bucket_slices)
subplot_titles = [""]
for (ls, le, _) in bucket_slices:
subplot_titles.append(f"ACI {ls}{le}")
fig = make_subplots(
rows=total_rows, cols=1,
shared_xaxes=True,
row_heights=[2] + [1]*len(bucket_slices),
vertical_spacing=0.03,
subplot_titles=subplot_titles
)
if data_type == "prices":
fig.add_trace(
go.Scatter(
x=dates,
y=df["Close"],
mode='lines',
line=dict(width=1.2),
name="Close Price"
),
row=1, col=1
)
fig.add_trace(
go.Scatter(
x=dates,
y=data_series,
mode='lines',
line=dict(width=1.2),
name="Smoothed Price"
),
row=1, col=1
)
elif data_type == "returns":
fig.add_trace(
go.Scatter(
x=dates,
y=data_series,
mode='lines',
line=dict(width=1.2),
name="Log Returns"
),
row=1, col=1
)
elif data_type == "volatility":
fig.add_trace(
go.Scatter(
x=dates,
y=data_series,
mode='lines',
line=dict(width=1.2),
name="Rolling Volatility"
),
row=1, col=1
)
for idx, (ls, le, subset) in enumerate(bucket_slices):
row_index = idx + 2
show_colorbar = (idx == len(bucket_slices) - 1)
heatmap = go.Heatmap(
x=dates,
y=list(range(ls, le + 1)),
z=subset,
colorscale=colorscale,
zmin=-1,
zmax=1,
showscale=show_colorbar,
colorbar=dict(title="Correlation") if show_colorbar else None
)
fig.add_trace(heatmap, row=row_index, col=1)
latest_date = pd.Timestamp(df.index[-1])
for idx, (ls, le, _) in enumerate(bucket_slices):
row_number = idx + 2
tickvals = list(range(ls, le + 1))
ticktext = [f"{lag} ({(latest_date - BDay(lag)).strftime('%Y-%m-%d')})"
for lag in tickvals]
fig.update_yaxes(
tickmode='array',
tickvals=tickvals,
ticktext=ticktext,
row=row_number,
tickfont=dict(size=8), #color="white",
col=1
)
fig.update_layout(
template="plotly_dark",
title=dict(text=f"Autocorrelation Indicator - {ticker} - {data_type.capitalize()}"),
height=800 + 200 * len(bucket_slices),
width=1600,
legend=dict(
orientation="h",
yanchor="bottom",
y=1.05,
xanchor="center",
x=0.5
)
)
fig.update_xaxes(
type="date",
tickangle=45,
tickformat="%Y-%m-%d"
)
return {"df": df,
"data_series": data_series,
"corrs": corrs,
"dates": dates,
"bucket_slices": bucket_slices,
"fig": fig}, None
# Initialize session state for results.
if "results" not in st.session_state:
st.session_state.results = {}
# Top radio for page selection.
current_page = st.sidebar.radio("Select Page",
options=["Prices", "Returns", "Volatility"],
help="Choose analysis type.")
st.sidebar.header("User Inputs")
with st.sidebar.expander("Data Inputs", expanded=True):
ticker = st.text_input("Ticker", value="SPY", help="Enter the ticker symbol.")
start_date = st.date_input("Start Date", value=datetime(2020, 1, 1),
help="Set the start date for daily data.")
default_end_date = datetime.today() + timedelta(days=1)
end_date = st.date_input("End Date", value=default_end_date,
help="Set the end date for daily data.")
with st.sidebar.expander("Methodology Parameters", expanded=True):
length = st.number_input(
"Window Size", value=20, min_value=1,
help="Controls how many days are used when comparing current vs past segments. Also used for smoothing (Prices) and rolling window in volatility."
)
lags_per_plot = st.number_input(
"Lags per Plot", value=32, min_value=1,
help="How many lag rows to include in each heatmap panel."
)
plot_start_lag = st.number_input(
"Plot Start Lag", value=30, min_value=0,
help="Lower bound of lag range to visualize. Set this to skip very short lags."
)
plot_end_lag = st.number_input(
"Plot End Lag", value=120, min_value=0,
help="Upper bound of lag range to visualize. The tool will measure similarity with up to this many days in the past."
)
max_lag = plot_end_lag
# Run Analysis button.
if st.sidebar.button("Run Analysis"):
st.session_state.ticker = ticker
st.session_state.start_date = start_date
st.session_state.end_date = end_date
st.session_state.length = length
st.session_state.max_lag = max_lag
st.session_state.lags_per_plot = lags_per_plot
st.session_state.plot_start_lag = plot_start_lag
st.session_state.plot_end_lag = plot_end_lag
st.session_state.page = current_page
with st.spinner("Running analysis..."):
results, error = run_analysis(
ticker,
start_date,
end_date,
length,
max_lag,
lags_per_plot,
plot_start_lag,
plot_end_lag,
current_page.lower()
)
st.session_state.results[current_page] = (results, error)
# Always show the main title and description
# Always show the main title and intro
st.title("Autocorrelation Periodogram")
st.markdown(
"This tool visualizes how market structure repeats across time by computing rolling autocorrelations over many lags.\n\n"
"You can analyze **Prices**, **Returns**, or **Volatility**. The heatmaps show how much today’s behavior resembles the past at different time horizons."
)
# Methodology expander with math
with st.expander("Methodology", expanded=False):
st.markdown("""
**Purpose**
Measure how similar the current behavior is to past behavior over multiple lags to detect persistence or reversion in structure.
**Autocorrelation formula**:
""")
st.latex(r"""
\rho_{t, L} = \frac{\sum_{i=0}^{N-1}(x_{t-i} - \bar{x})(x_{t-L-i} - \bar{y})}
{\sqrt{\sum_{i=0}^{N-1}(x_{t-i} - \bar{x})^2} \cdot
\sqrt{\sum_{i=0}^{N-1}(x_{t-L-i} - \bar{y})^2}}
""")
st.markdown("""
- \( x \): current window
- \( y \): lagged window shifted by \( L \) days
- \( N \): window size (set via **Window Size**)
- \( L \): lag (from 0 to **Max Lag**)
**Inputs** (configured in sidebar):
- **Window Size**: used for autocorrelation and volatility. Also used for smoothing in *Prices* mode.
- **Max Lag**: upper bound on lag values to compute.
- **Lags per Plot**: number of lag rows per heatmap.
- **Plot Start / End Lag**: limits for lags to visualize.
**Output**
The app displays:
- A top panel with the selected series.
- One or more heatmaps below showing autocorrelation across lag ranges.
- Color scale: green = positive correlation (momentum), red = negative correlation (mean reversion), yellow = no structure.
""")
# Show analysis results (if any)
if current_page in st.session_state.results:
results, error = st.session_state.results[current_page]
st.markdown(f"### {current_page} Analysis")
if error:
st.error(error)
else:
lag_start = st.session_state.plot_start_lag
lag_end = st.session_state.plot_end_lag
lags_per_plot = st.session_state.lags_per_plot
n_panels = ceil((lag_end - lag_start + 1) / lags_per_plot)
if current_page.lower() == "prices":
st.markdown(f"""
**Input type**: Closing prices (smoothed with Ehlers' filter)
**Top panel**: Raw close vs smoothed price
**Lower panels**: Autocorrelation of smoothed prices across {n_panels} lag bands
**Lag range**: {lag_start} to {lag_end}
**Window size**: {st.session_state.length}
""")
elif current_page.lower() == "returns":
st.markdown(f"""
**Input type**: Log returns
**Top panel**: Daily log returns
**Lower panels**: Autocorrelation of returns across {n_panels} lag bands
**Lag range**: {lag_start} to {lag_end}
**Window size**: {st.session_state.length}
""")
elif current_page.lower() == "volatility":
st.markdown(f"""
**Input type**: Rolling standard deviation of log returns
**Top panel**: Rolling volatility
**Lower panels**: Autocorrelation of volatility across {n_panels} lag bands
**Lag range**: {lag_start} to {lag_end}
**Window size**: {st.session_state.length}
""")
st.plotly_chart(results["fig"], use_container_width=True)
else:
#st.markdown("#### No analysis run yet")
st.info("Use the sidebar to set parameters and click **Run Analysis** to display results here.")
# Hide default Streamlit style
st.markdown(
"""
<style>
#MainMenu {visibility: hidden;}
footer {visibility: hidden;}
</style>
""",
unsafe_allow_html=True
)