File size: 7,774 Bytes
2017ea2
 
 
 
 
 
 
e41bdd0
2017ea2
6c34806
2017ea2
76c40a6
 
 
 
 
2017ea2
76c40a6
 
2017ea2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6c34806
4df4a95
 
 
 
 
6c34806
4df4a95
 
822dbfd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
adec47d
 
 
 
6c34806
 
 
 
 
 
 
 
 
 
e41bdd0
6c34806
 
 
 
e41bdd0
6c34806
0e7ec56
4df4a95
b9356a3
76c40a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2017ea2
306e37a
 
 
 
 
 
 
 
 
 
 
 
6c34806
2017ea2
 
 
 
 
 
76c40a6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
import streamlit as st
import yfinance as yf
import numpy as np
from ripser import Rips
import persim
import plotly.graph_objs as go
import warnings
import pandas as pd

# Function to fetch stock or crypto data
def fetch_data(ticker_name, start_date, end_date):
    raw_data = yf.download(ticker_name, start=start_date, end=end_date, auto_adjust=False)  # Unadjusted prices
    if isinstance(raw_data.columns, pd.MultiIndex):  # Flatten multi-index
        raw_data.columns = raw_data.columns.get_level_values(0)
    if raw_data.empty:
        raise ValueError(f"No data found for {ticker_name} from {start_date} to {end_date}")
    adjusted_close = raw_data['Adj Close'].dropna()
    if len(adjusted_close) < 2:  # Need at least 2 points for log returns
        raise ValueError(f"Insufficient data points for {ticker_name}")
    prices = adjusted_close.values
    log_returns = np.log(prices[1:] / prices[:-1])
    return adjusted_close, log_returns

# Function to compute Wasserstein distances
def compute_wasserstein_distances(log_returns, window_size, rips):
    n = len(log_returns) - (2 * window_size) + 1
    distances = np.full((n, 1), np.nan)

    for i in range(n):
        segment1 = log_returns[i:i+window_size].reshape(-1, 1)
        segment2 = log_returns[i+window_size:i+(2*window_size)].reshape(-1, 1)

        if segment1.shape[0] != window_size or segment2.shape[0] != window_size:
            continue

        dgm1 = rips.fit_transform(segment1)
        dgm2 = rips.fit_transform(segment2)
        distance = persim.wasserstein(dgm1[0], dgm2[0], matching=False)
        distances[i] = distance

    return distances

# Streamlit app configuration
st.set_page_config(layout="wide")

st.title("Market Crash Analysis with Topology")
st.write("""
This application analyzes asset price data using Wasserstein distances to detect changes in price dynamics over time.
Wasserstein distances, derived from persistence diagrams in Topological Data Analysis (TDA), help identify significant shifts in asset price behaviors for both stocks and cryptocurrencies.
""")

with st.expander("Wasserstein Distance Methodology", expanded=False):
    # Explanation of the Wasserstein Distance method
    st.subheader("Wasserstein Distance Methodology")
    st.write("""
    The Wasserstein distance is a measure from optimal transport theory, used here to compare distributions of log returns in different time windows.
    A high Wasserstein distance indicates a significant change in the price dynamics, which might suggest a market event or shift in investor sentiment.
    """)
    
    st.latex(r'''
    W(P, Q) = \inf_{\gamma \in \Pi(P, Q)} \mathbb{E}_{(x,y) \sim \gamma} [d(x, y)]
    ''')
    
    st.write("""
    - Where \( W(P, Q) \) is the Wasserstein distance between distributions \( P \) and \( Q \).
    - \( d(x, y) \) is the distance between points \( x \) and \( y \).
    - \( \gamma \) is a joint distribution with marginals \( P \) and \( Q \).
    """)
    
    # Interpretation of results
    st.subheader("Interpretation of Results")
    st.write("""
    **Wasserstein Distance Analysis:**
    The Wasserstein distance quantifies changes in the log returns of asset prices over time.
    A high distance indicates a significant shift in price dynamics, potentially due to a market event or a change in investor behavior.
    """)

st.sidebar.title("""
Input Parameters
""")

with st.sidebar.expander("How to Use", expanded=False):
    st.write("""
    **How to use this app:**
    1. Enter the stock or crypto ticker symbol (e.g., `^GSPC` for S&P 500 or `BTC-USD` for Bitcoin).
    2. Specify the start and end dates for the analysis period.
    3. Adjust the window size for the sliding window analysis.
    4. Set the alert threshold for detecting significant changes in price dynamics.
    5. Click 'Run Analysis' to start.
    """)

with st.sidebar.expander("Input Parameters", expanded=True):
    ticker_name = st.text_input('Enter Stock or Crypto Symbol (e.g., AAPL or BTC-USD)', '^GSPC', help="Enter the ticker symbol for the stock or cryptocurrency you want to analyze.")
    start_date_string = st.date_input('Start Date', pd.to_datetime('2020-01-01'), help="Select the start date for the data range.")
    end_date_string = st.date_input('End Date', pd.to_datetime(pd.Timestamp.now().date() + pd.Timedelta(days=1)), help="Select the end date for the data range.")

with st.sidebar.expander("Parameters", expanded=True):
    window_size = st.slider('Window Size', min_value=5, max_value=50, value=20, help="Set the window size for the sliding window analysis.")
    threshold = st.slider('Alert Threshold', min_value=0.02, max_value=0.2, value=0.075, step=0.005, help="Set the threshold for detecting significant changes in price dynamics.")

if st.sidebar.button('Run Analysis'):
    try:
        st.write(f"Analyzing {ticker_name} from {start_date_string} to {end_date_string} with window size {window_size} and threshold {threshold}")

        # Fetch data
        prices, log_returns = fetch_data(ticker_name, start_date_string, end_date_string)
        if len(log_returns) < 2 * window_size:
            raise ValueError(f"Insufficient data: Need at least {2 * window_size} log returns, got {len(log_returns)}")

        rips = Rips(maxdim=2)
        wasserstein_dists = compute_wasserstein_distances(log_returns, window_size, rips)

        # Plotting with Plotly
        dates = prices.index[window_size:-window_size]
        valid_indices = ~np.isnan(wasserstein_dists.flatten())
        valid_dates = dates[valid_indices]
        valid_distances = wasserstein_dists[valid_indices].flatten()

        alert_indices = [i for i, d in enumerate(valid_distances) if d > threshold]
        alert_dates = [valid_dates[i] for i in alert_indices]
        alert_values = [prices.iloc[i + window_size] for i in alert_indices]

        # Plot price and alerts
        fig = go.Figure()
        fig.add_trace(go.Scatter(x=valid_dates, y=prices.iloc[window_size:-window_size], mode='lines', name='Price'))
        fig.add_trace(go.Scatter(x=alert_dates, y=alert_values, mode='markers', name='Alert', marker=dict(color='red', size=8)))
        fig.update_layout(title=f'{ticker_name} Prices Over Time', xaxis_title='Date', yaxis_title='Price')
        st.plotly_chart(fig, use_container_width=True)

        # Plot Wasserstein distances
        fig = go.Figure()
        fig.add_trace(go.Scatter(x=valid_dates, y=valid_distances, mode='lines', name='Wasserstein Distance', line=dict(color='blue', width=2)))
        fig.add_hline(y=threshold, line_dash='dash', line_color='red', annotation_text=f'Threshold: {threshold}', annotation_position='bottom right')
        fig.update_layout(title='Wasserstein Distances Over Time', xaxis_title='Date', yaxis_title='Wasserstein Distance')
        st.plotly_chart(fig, use_container_width=True)

        st.write("""
        **Plot Interpretation:**
        - The first plot shows the asset price over time with alerts marked in red.
        - The second plot displays the Wasserstein distances over time, with the threshold indicated by a dashed red line. Peaks above this line represent significant changes in price dynamics.
        """)
    except Exception as e:
        st.error(f"Error: {str(e)}. Check ticker symbol, date range, or window size.")

st.markdown(
    """
    <style>
    /* Adjust the width of the sidebar */
    [data-testid="stSidebar"] {
        width: 500px;  /* Change this value to set the width you want */
    }
    </style>
    """,
    unsafe_allow_html=True
)

# Hide the default Streamlit menu and footer
hide_streamlit_style = """
<style>
#MainMenu {visibility: hidden;}
footer {visibility: hidden;}
</style>
"""
st.markdown(hide_streamlit_style, unsafe_allow_html=True)