Spaces:

QuantumLearner
/

Space72

Sleeping

App Files Files Community

QuantumLearner commited on Apr 7, 2025

Commit

7ed431a

verified ·

1 Parent(s): 669e2e9

Create app.py

Browse files

Files changed (1) hide show

app.py +377 -0

app.py ADDED Viewed

	@@ -0,0 +1,377 @@

+import streamlit as st
+import yfinance as yf
+import numpy as np
+import pandas as pd
+from math import ceil
+from datetime import datetime, timedelta
+import plotly.graph_objects as go
+from plotly.subplots import make_subplots
+from pandas.tseries.offsets import BDay
+st.set_page_config(page_title="Autocorrelation Periodogram", layout="wide")
+@st.cache_data(show_spinner=False)
+def run_analysis(ticker, start_date, end_date, length, max_lag,
+                 lags_per_plot, plot_start_lag, plot_end_lag, data_type):
+    df = yf.download(ticker, start=start_date, end=end_date,
+                     interval="1d", auto_adjust=True)
+    if df.empty:
+        return None, "No data available for the given inputs."
+    if isinstance(df.columns, pd.MultiIndex):
+        df.columns = df.columns.get_level_values(0)
+    else:
+        df.columns = [c.split("_")[0] for c in df.columns]
+    def ultimate_smoother(src, period):
+        a1 = np.exp(-1.414 * np.pi / period)
+        c2 = 2.0 * a1 * np.cos(1.414 * np.pi / period)
+        c3 = -a1 * a1
+        c1 = (1.0 + c2 - c3) / 4.0
+        n = len(src)
+        out = np.copy(src).astype(float)
+        for i in range(3, n):
+            out[i] = ((1.0 - c1) * src[i]
+                      + (2.0 * c1 - c2) * src[i-1]
+                      - (c1 + c3) * src[i-2]
+                      + c2 * out[i-1]
+                      + c3 * out[i-2])
+        return out
+    if data_type == "prices":
+        raw_series = df["Close"].values
+        data_series = ultimate_smoother(raw_series, length)
+    elif data_type == "returns":
+        prices = df["Close"].values
+        log_prices = np.log(prices)
+        data_series = np.diff(log_prices, prepend=np.nan)
+        data_series[0] = 0.0
+    elif data_type == "volatility":
+        prices = df["Close"].values
+        log_prices = np.log(prices)
+        returns = np.diff(log_prices, prepend=np.nan)
+        returns[0] = 0.0
+        vol_series = pd.Series(returns).rolling(window=length).std().to_numpy()
+        vol_series[:length-1] = 0.0
+        data_series = vol_series
+    else:
+        return None, "Invalid data type."
+    def compute_autocorrelation(series, window_length, max_lag):
+        n = len(series)
+        corrs = np.full((n, max_lag+1), np.nan, dtype=float)
+        for i in range(window_length - 1, n):
+            window = series[i - window_length + 1 : i + 1]
+            sum_x  = np.sum(window)
+            sum_xx = np.sum(window * window)
+            for L in range(max_lag + 1):
+                start_lag = i - window_length - L + 1
+                end_lag   = i - L + 1
+                if start_lag < 0:
+                    continue
+                window_lag = series[start_lag : end_lag]
+                if len(window_lag) != window_length:
+                    continue
+                sum_y  = np.sum(window_lag)
+                sum_yy = np.sum(window_lag * window_lag)
+                sum_xy = np.sum(window * window_lag)
+                denom_x = window_length * sum_xx - sum_x * sum_x
+                denom_y = window_length * sum_yy - sum_y * sum_y
+                if denom_x > 0 and denom_y > 0:
+                    numer = window_length * sum_xy - sum_x * sum_y
+                    corrs[i, L] = numer / np.sqrt(denom_x * denom_y)
+        return corrs
+    corrs = compute_autocorrelation(data_series, length, max_lag)
+    dates = df.index.to_pydatetime()
+    def slice_corr(corr_matrix, lag_start, lag_end):
+        subset = corr_matrix[:, lag_start : lag_end + 1]
+        return subset.T
+    plot_range = plot_end_lag - plot_start_lag + 1
+    n_plots = ceil(plot_range / lags_per_plot)
+    bucket_slices = []
+    for i in range(n_plots):
+        ls = plot_start_lag + i * lags_per_plot
+        le = min(plot_start_lag + (i+1) * lags_per_plot - 1, plot_end_lag)
+        subset = slice_corr(corrs, ls, le)
+        bucket_slices.append((ls, le, subset))
+    colorscale = [[0.0, 'red'], [0.5, 'yellow'], [1.0, 'green']]
+    total_rows = 1 + len(bucket_slices)
+    subplot_titles = [""]
+    for (ls, le, _) in bucket_slices:
+        subplot_titles.append(f"ACI {ls}–{le}")
+    fig = make_subplots(
+        rows=total_rows, cols=1,
+        shared_xaxes=True,
+        row_heights=[2] + [1]*len(bucket_slices),
+        vertical_spacing=0.03,
+        subplot_titles=subplot_titles
+    )
+    if data_type == "prices":
+        fig.add_trace(
+            go.Scatter(
+                x=dates,
+                y=df["Close"],
+                mode='lines',
+                line=dict(width=1.2),
+                name="Close Price"
+            ),
+            row=1, col=1
+        )
+        fig.add_trace(
+            go.Scatter(
+                x=dates,
+                y=data_series,
+                mode='lines',
+                line=dict(width=1.2),
+                name="Smoothed Price"
+            ),
+            row=1, col=1
+        )
+    elif data_type == "returns":
+        fig.add_trace(
+            go.Scatter(
+                x=dates,
+                y=data_series,
+                mode='lines',
+                line=dict(width=1.2),
+                name="Log Returns"
+            ),
+            row=1, col=1
+        )
+    elif data_type == "volatility":
+        fig.add_trace(
+            go.Scatter(
+                x=dates,
+                y=data_series,
+                mode='lines',
+                line=dict(width=1.2),
+                name="Rolling Volatility"
+            ),
+            row=1, col=1
+        )
+    for idx, (ls, le, subset) in enumerate(bucket_slices):
+        row_index = idx + 2
+        show_colorbar = (idx == len(bucket_slices) - 1)
+        heatmap = go.Heatmap(
+            x=dates,
+            y=list(range(ls, le + 1)),
+            z=subset,
+            colorscale=colorscale,
+            zmin=-1,
+            zmax=1,
+            showscale=show_colorbar,
+            colorbar=dict(title="Correlation") if show_colorbar else None
+        )
+        fig.add_trace(heatmap, row=row_index, col=1)
+    latest_date = pd.Timestamp(df.index[-1])
+    for idx, (ls, le, _) in enumerate(bucket_slices):
+        row_number = idx + 2
+        tickvals = list(range(ls, le + 1))
+        ticktext = [f"{lag} ({(latest_date - BDay(lag)).strftime('%Y-%m-%d')})"
+                    for lag in tickvals]
+        fig.update_yaxes(
+            tickmode='array',
+            tickvals=tickvals,
+            ticktext=ticktext,
+            row=row_number,
+            tickfont=dict(size=8), #color="white",
+            col=1
+        )
+    fig.update_layout(
+        template="plotly_dark",
+        title=dict(text=f"Autocorrelation Indicator - {ticker} - {data_type.capitalize()}"),
+        height=800 + 200 * len(bucket_slices),
+        width=1600,
+        legend=dict(
+            orientation="h",
+            yanchor="bottom",
+            y=1.05,
+            xanchor="center",
+            x=0.5
+        )
+    )
+    fig.update_xaxes(
+        type="date",
+        tickangle=45,
+        tickformat="%Y-%m-%d"
+    )
+    return {"df": df,
+            "data_series": data_series,
+            "corrs": corrs,
+            "dates": dates,
+            "bucket_slices": bucket_slices,
+            "fig": fig}, None
+# Initialize session state for results.
+if "results" not in st.session_state:
+    st.session_state.results = {}
+# Top radio for page selection.
+current_page = st.sidebar.radio("Select Page",
+                                options=["Prices", "Returns", "Volatility"],
+                                help="Choose analysis type.")
+st.sidebar.header("User Inputs")
+with st.sidebar.expander("Data Inputs", expanded=True):
+    ticker = st.text_input("Ticker", value="SPY", help="Enter the ticker symbol.")
+    start_date = st.date_input("Start Date", value=datetime(2020, 1, 1),
+                               help="Set the start date for daily data.")
+    default_end_date = datetime.today() + timedelta(days=1)
+    end_date = st.date_input("End Date", value=default_end_date,
+                             help="Set the end date for daily data.")
+with st.sidebar.expander("Methodology Parameters", expanded=True):
+    length = st.number_input(
+        "Window Size", value=20, min_value=1,
+        help="Controls how many days are used when comparing current vs past segments. Also used for smoothing (Prices) and rolling window in volatility."
+    )
+    lags_per_plot = st.number_input(
+        "Lags per Plot", value=32, min_value=1,
+        help="How many lag rows to include in each heatmap panel."
+    )
+    plot_start_lag = st.number_input(
+        "Plot Start Lag", value=30, min_value=0,
+        help="Lower bound of lag range to visualize. Set this to skip very short lags."
+    )
+    plot_end_lag = st.number_input(
+        "Plot End Lag", value=120, min_value=0,
+        help="Upper bound of lag range to visualize. The tool will measure similarity with up to this many days in the past."
+    )
+max_lag = plot_end_lag
+# Run Analysis button.
+if st.sidebar.button("Run Analysis"):
+    st.session_state.ticker = ticker
+    st.session_state.start_date = start_date
+    st.session_state.end_date = end_date
+    st.session_state.length = length
+    st.session_state.max_lag = max_lag
+    st.session_state.lags_per_plot = lags_per_plot
+    st.session_state.plot_start_lag = plot_start_lag
+    st.session_state.plot_end_lag = plot_end_lag
+    st.session_state.page = current_page
+    with st.spinner("Running analysis..."):
+        results, error = run_analysis(
+            ticker,
+            start_date,
+            end_date,
+            length,
+            max_lag,
+            lags_per_plot,
+            plot_start_lag,
+            plot_end_lag,
+            current_page.lower()
+        )
+    st.session_state.results[current_page] = (results, error)
+# Always show the main title and description
+# Always show the main title and intro
+st.title("Autocorrelation Periodogram")
+st.markdown(
+    "This tool visualizes how market structure repeats across time by computing rolling autocorrelations over many lags.\n\n"
+    "You can analyze **Prices**, **Returns**, or **Volatility**. The heatmaps show how much today’s behavior resembles the past at different time horizons."
+)
+# Methodology expander with math
+with st.expander("Methodology", expanded=False):
+    st.markdown("""
+**Purpose**
+Measure how similar the current behavior is to past behavior over multiple lags to detect persistence or reversion in structure.
+**Autocorrelation formula**:
+""")
+    st.latex(r"""
+\rho_{t, L} = \frac{\sum_{i=0}^{N-1}(x_{t-i} - \bar{x})(x_{t-L-i} - \bar{y})}
+                   {\sqrt{\sum_{i=0}^{N-1}(x_{t-i} - \bar{x})^2} \cdot
+                          \sqrt{\sum_{i=0}^{N-1}(x_{t-L-i} - \bar{y})^2}}
+""")
+    st.markdown("""
+- \( x \): current window
+- \( y \): lagged window shifted by \( L \) days
+- \( N \): window size (set via **Window Size**)
+- \( L \): lag (from 0 to **Max Lag**)
+**Inputs** (configured in sidebar):
+- **Window Size**: used for autocorrelation and volatility. Also used for smoothing in *Prices* mode.
+- **Max Lag**: upper bound on lag values to compute.
+- **Lags per Plot**: number of lag rows per heatmap.
+- **Plot Start / End Lag**: limits for lags to visualize.
+**Output**
+The app displays:
+- A top panel with the selected series.
+- One or more heatmaps below showing autocorrelation across lag ranges.
+- Color scale: green = positive correlation (momentum), red = negative correlation (mean reversion), yellow = no structure.
+""")
+# Show analysis results (if any)
+if current_page in st.session_state.results:
+    results, error = st.session_state.results[current_page]
+    st.markdown(f"### {current_page} Analysis")
+    if error:
+        st.error(error)
+    else:
+        lag_start = st.session_state.plot_start_lag
+        lag_end = st.session_state.plot_end_lag
+        lags_per_plot = st.session_state.lags_per_plot
+        n_panels = ceil((lag_end - lag_start + 1) / lags_per_plot)
+        if current_page.lower() == "prices":
+            st.markdown(f"""
+**Input type**: Closing prices (smoothed with Ehlers' filter)
+**Top panel**: Raw close vs smoothed price
+**Lower panels**: Autocorrelation of smoothed prices across {n_panels} lag bands
+**Lag range**: {lag_start} to {lag_end}
+**Window size**: {st.session_state.length}
+""")
+        elif current_page.lower() == "returns":
+            st.markdown(f"""
+**Input type**: Log returns
+**Top panel**: Daily log returns
+**Lower panels**: Autocorrelation of returns across {n_panels} lag bands
+**Lag range**: {lag_start} to {lag_end}
+**Window size**: {st.session_state.length}
+""")
+        elif current_page.lower() == "volatility":
+            st.markdown(f"""
+**Input type**: Rolling standard deviation of log returns
+**Top panel**: Rolling volatility
+**Lower panels**: Autocorrelation of volatility across {n_panels} lag bands
+**Lag range**: {lag_start} to {lag_end}
+**Window size**: {st.session_state.length}
+""")
+        st.plotly_chart(results["fig"], use_container_width=True)
+else:
+    #st.markdown("#### No analysis run yet")
+    st.info("Use the sidebar to set parameters and click **Run Analysis** to display results here.")
+# Hide default Streamlit style
+st.markdown(
+    """
+    <style>
+    #MainMenu {visibility: hidden;}
+    footer {visibility: hidden;}
+    </style>
+    """,
+    unsafe_allow_html=True
+)