import streamlit as st import yfinance as yf import pandas as pd import numpy as np from pykalman import KalmanFilter import plotly.graph_objs as go import networkx as nx # Streamlit app setup st.set_page_config(layout="wide") st.title("Herding Behaviour Analysis in Financial Markets") st.markdown("This app analyzes herding behavior in financial markets by examining price movements and correlations.") # Sidebar: How to Use (closed by default) with st.sidebar.expander("How to Use", expanded=False): st.write( """ 1. Select the stock ticker or crypto pairs. 2. Choose the time period. 3. Set additional parameters for the analyses. 4. Click 'Run Analysis' to see the results. """ ) # Sidebar: Assets and Dates (open by default) with st.sidebar.expander("Assets and Dates", expanded=True): tickers = st.text_area("Asset Symbols (Crypto-Pair or Stock Ticker) (comma-separated)", value="BTC-USD,ETH-USD,BNB-USD,ADA-USD,SOL-USD,DOT-USD,DOGE-USD,AVAX-USD,MATIC-USD,LTC-USD,LUNA1-USD,LINK-USD,ALGO-USD,ATOM-USD,FTT-USD,TRX-USD,ETC-USD,FIL-USD,XMR-USD,XLM-USD", help="Enter the ticker symbols for the assets you want to analyze, separated by commas. E.g., 'BTC-USD, ETH-USD'.").split(",") start_date = st.date_input("Start Date", value=pd.to_datetime("2020-01-01"), help="Select the start date for the analysis period.") end_date = st.date_input("End Date", value=pd.to_datetime(pd.Timestamp.now().date() + pd.Timedelta(days=1)), help="Select the end date for the analysis period.") # Sidebar: Market Index and Correlation (open by default) with st.sidebar.expander("Market Index and Correlation", expanded=True): market_index = st.text_input("Market Index Ticker", value="BTC-USD", help="Enter the ticker symbol for the market index, e.g., 'BTC-USD'.") correlation_threshold = st.slider("Correlation Threshold (for Network Analysis)", min_value=0.0, max_value=1.0, value=0.75, step=0.05, help="Set the threshold for correlation in the network analysis.") # Run Analysis button run_button = st.sidebar.button("Run Analysis") if run_button: # Include market index in the tickers list if not already present if market_index not in tickers: tickers.append(market_index) # Fetching Asset data data = yf.download(tickers, start=start_date, end=end_date)['Close'] # Clean the data by filling or dropping NaN and infinite values data = data.fillna(method='ffill').dropna() data = data.replace([np.inf, -np.inf], np.nan).dropna() if data.empty: st.warning("No data available for the given assets and date range.") else: st.markdown("### Asset Prices Reindexed to Start at 0") st.markdown("Reindexed asset prices to compare their relative movements over time.") data_reindexed = data.apply(lambda x: x / x.iloc[0]) fig = go.Figure() for ticker in tickers: fig.add_trace(go.Scatter(x=data_reindexed.index, y=data_reindexed[ticker], mode='lines', name=ticker)) fig.update_layout( title="Asset Prices Reindexed to Start at 0", xaxis_title="Date", yaxis_title="Reindexed Price", template="plotly_dark" ) st.plotly_chart(fig, use_container_width=True) # Calculating daily returns returns = data.pct_change().dropna() # Ensure no inf or NaN values in returns returns = returns.replace([np.inf, -np.inf], np.nan).dropna() if returns.empty or len(returns) <= 1: st.warning("Not enough data to perform further analysis.") else: st.markdown("### Kalman Filter: Estimated Common Factor and Asset Returns") st.markdown("Using the Kalman Filter to estimate a common factor influencing all asset returns.") # Methodology in expander with st.expander("Kalman Filter Methodology", expanded=False): st.markdown("The Kalman Filter operates based on the following state-space model:") st.latex(r""" \text{State Equation:} \quad \mathbf{x}_t = \mathbf{A} \mathbf{x}_{t-1} + \mathbf{w}_t """) st.latex(r""" \text{Observation Equation:} \quad \mathbf{y}_t = \mathbf{H} \mathbf{x}_t + \mathbf{v}_t """) st.markdown(""" Where: - \(xt\) is the state vector (the common factor we are estimating). - \(A\) is the state transition matrix (set to the identity matrix \(I\)). - \(wt\) is the process noise (with covariance \(Q\)). - \(yt\) is the observation vector (asset returns). - \(H\) is the observation matrix (set to a vector of ones). - \(vt\) is the observation noise (with covariance \(R\)). """) observations = returns.values initial_state_mean = np.zeros(1) kf = KalmanFilter( transition_matrices=np.eye(1), observation_matrices=np.ones((len(tickers), 1)), initial_state_mean=initial_state_mean, observation_covariance=np.eye(len(tickers)), transition_covariance=np.eye(1) ) state_means, state_covariances = kf.em(observations).filter(observations) fig = go.Figure() for i, ticker in enumerate(tickers): fig.add_trace(go.Scatter(x=returns.index, y=observations[:, i], mode='lines', name=f'{ticker} Returns')) fig.add_trace(go.Scatter(x=returns.index, y=state_means[:, 0], mode='lines', name='Estimated Common Factor', line=dict(color='red', width=4))) fig.update_layout( title='Kalman Filter: Estimated Common Factor and Asset Returns', xaxis_title='Date', yaxis_title='Returns', template='plotly_dark' ) st.plotly_chart(fig, use_container_width=True) st.markdown("### CSSD and CSAD Calculations") st.markdown("Calculating the Cross-Sectional Standard Deviation (CSSD) and Cross-Sectional Absolute Deviation (CSAD) of asset returns.") # Methodology in expander with st.expander("CSSD and CSAD Methodology", expanded=False): st.markdown("The formulas for CSSD and CSAD are as follows:") st.markdown("**CSSD (Cross-Sectional Standard Deviation):**") st.latex(r"\text{CSSD}_t = \sqrt{\frac{\sum_{i=1}^{N} (R_{i,t} - \overline{R}_t)^2}{N - 1}}") st.markdown("**CSAD (Cross-Sectional Absolute Deviation):**") st.latex(r"\text{CSAD}_t = \frac{\sum_{i=1}^{N} |R_{i,t} - \overline{R}_t|}{N}") market_return = returns[market_index] returns = returns.drop(columns=[market_index]) def calculate_cssd(returns, market_return): cssd = np.sqrt(((returns - market_return[:, None]) ** 2).sum(axis=1) / (returns.shape[1] - 1)) return cssd def calculate_csad(returns, market_return): csad = (np.abs(returns - market_return[:, None]).sum(axis=1)) / returns.shape[1] return csad cssd = calculate_cssd(returns.values, market_return.values) csad = calculate_csad(returns.values, market_return.values) window_size = 30 def rolling_csad(stock_returns, market_returns, window): csad_values = [] for i in range(len(stock_returns) - window + 1): window_data = stock_returns.iloc[i:i+window] window_market = market_returns[i:i+window].mean() N = len(window_data.columns) csad = (1 / N) * np.sum(np.abs(window_data.sub(window_market, axis=0)).mean(axis=1)) csad_values.append(csad) return pd.Series(csad_values, index=stock_returns.index[window-1:]) def rolling_cssd(stock_returns, market_returns, window): cssd_values = [] for i in range(len(stock_returns) - window + 1): window_data = stock_returns.iloc[i:i+window] window_market = market_returns[i:i+window].mean() N = len(window_data.columns) cssd = np.sqrt((1 / (N - 1)) * np.sum(np.square(window_data.sub(window_market, axis=0)).mean(axis=1))) cssd_values.append(cssd) return pd.Series(cssd_values, index=stock_returns.index[window-1:]) rolling_csad_values = rolling_csad(returns, market_return, window_size) rolling_cssd_values = rolling_cssd(returns, market_return, window_size) fig = go.Figure() fig.add_trace(go.Scatter(x=returns.index, y=cssd, mode='lines', name='CSSD', line=dict(color='blue'))) fig.add_trace(go.Scatter(x=returns.index, y=csad, mode='lines', name='CSAD', line=dict(color='red'))) fig.add_trace(go.Scatter(x=rolling_csad_values.index, y=rolling_csad_values, mode='lines', name='Rolling CSAD (30 days)', line=dict(color='orange'))) fig.add_trace(go.Scatter(x=rolling_cssd_values.index, y=rolling_cssd_values, mode='lines', name='Rolling CSSD (30 days)', line=dict(color='green'))) fig.update_layout( title='CSSD, CSAD, Rolling CSAD, and Rolling CSSD over Time', xaxis_title='Date', yaxis_title='Value', template='plotly_dark' ) st.plotly_chart(fig, use_container_width=True) st.markdown("### Network Visualization of Asset Correlations") st.markdown("Visualizing the correlations between assets as a network.") years = data.index.year.unique() def plot_network_for_year(data_for_year, year, threshold): corr_matrix = data_for_year.corr() G = nx.Graph() for ticker in tickers: G.add_node(ticker) for i in range(len(tickers)): for j in range(i+1, len(tickers)): if abs(corr_matrix.iloc[i, j]) > threshold: G.add_edge(tickers[i], tickers[j], weight=corr_matrix.iloc[i, j]) pos = nx.spring_layout(G) edge_trace = [] for edge in G.edges(data=True): x0, y0 = pos[edge[0]] x1, y1 = pos[edge[1]] trace = go.Scatter( x=[x0, x1, None], y=[y0, y1, None], line=dict(width=2, color='blue'), hoverinfo='none', mode='lines' ) edge_trace.append(trace) node_trace = go.Scatter( x=[pos[node][0] for node in G.nodes()], y=[pos[node][1] for node in G.nodes()], text=[node for node in G.nodes()], mode='markers+text', textposition='top center', hoverinfo='text', marker=dict( size=10, color='red', ) ) layout = go.Layout( title=f'Asset Correlation Network for {year}', showlegend=False, hovermode='closest', margin=dict(b=20, l=5, r=5, t=40), xaxis=dict(showgrid=False, zeroline=False, showticklabels=False), yaxis=dict(showgrid=False, zeroline=False, showticklabels=False) ) fig = go.Figure(data=edge_trace + [node_trace], layout=layout) st.plotly_chart(fig, use_container_width=True) for year in sorted(years, reverse=True): # Reverse the order of years data_for_year = data[data.index.year == year] plot_network_for_year(data_for_year, year, correlation_threshold) hide_streamlit_style = """ """ st.markdown(hide_streamlit_style, unsafe_allow_html=True)