Space21 / app.py
QuantumLearner's picture
Update app.py
ea2b8ba verified
import streamlit as st
import yfinance as yf
import pandas as pd
import numpy as np
from pykalman import KalmanFilter
import plotly.graph_objs as go
import networkx as nx
# Streamlit app setup
st.set_page_config(layout="wide")
st.title("Herding Behaviour Analysis in Financial Markets")
st.markdown("This app analyzes herding behavior in financial markets by examining price movements and correlations.")
# Sidebar: How to Use (closed by default)
with st.sidebar.expander("How to Use", expanded=False):
st.write(
"""
1. Select the stock ticker or crypto pairs.
2. Choose the time period.
3. Set additional parameters for the analyses.
4. Click 'Run Analysis' to see the results.
"""
)
# Sidebar: Assets and Dates (open by default)
with st.sidebar.expander("Assets and Dates", expanded=True):
tickers = st.text_area("Asset Symbols (Crypto-Pair or Stock Ticker) (comma-separated)",
value="BTC-USD,ETH-USD,BNB-USD,ADA-USD,SOL-USD,DOT-USD,DOGE-USD,AVAX-USD,MATIC-USD,LTC-USD,LUNA1-USD,LINK-USD,ALGO-USD,ATOM-USD,FTT-USD,TRX-USD,ETC-USD,FIL-USD,XMR-USD,XLM-USD",
help="Enter the ticker symbols for the assets you want to analyze, separated by commas. E.g., 'BTC-USD, ETH-USD'.").split(",")
start_date = st.date_input("Start Date", value=pd.to_datetime("2020-01-01"),
help="Select the start date for the analysis period.")
end_date = st.date_input("End Date", value=pd.to_datetime(pd.Timestamp.now().date() + pd.Timedelta(days=1)),
help="Select the end date for the analysis period.")
# Sidebar: Market Index and Correlation (open by default)
with st.sidebar.expander("Market Index and Correlation", expanded=True):
market_index = st.text_input("Market Index Ticker", value="BTC-USD",
help="Enter the ticker symbol for the market index, e.g., 'BTC-USD'.")
correlation_threshold = st.slider("Correlation Threshold (for Network Analysis)",
min_value=0.0, max_value=1.0, value=0.75, step=0.05,
help="Set the threshold for correlation in the network analysis.")
# Run Analysis button
run_button = st.sidebar.button("Run Analysis")
if run_button:
# Include market index in the tickers list if not already present
if market_index not in tickers:
tickers.append(market_index)
# Fetching Asset data
data = yf.download(tickers, start=start_date, end=end_date)['Close']
# Clean the data by filling or dropping NaN and infinite values
data = data.fillna(method='ffill').dropna()
data = data.replace([np.inf, -np.inf], np.nan).dropna()
if data.empty:
st.warning("No data available for the given assets and date range.")
else:
st.markdown("### Asset Prices Reindexed to Start at 0")
st.markdown("Reindexed asset prices to compare their relative movements over time.")
data_reindexed = data.apply(lambda x: x / x.iloc[0])
fig = go.Figure()
for ticker in tickers:
fig.add_trace(go.Scatter(x=data_reindexed.index, y=data_reindexed[ticker], mode='lines', name=ticker))
fig.update_layout(
title="Asset Prices Reindexed to Start at 0",
xaxis_title="Date",
yaxis_title="Reindexed Price",
template="plotly_dark"
)
st.plotly_chart(fig, use_container_width=True)
# Calculating daily returns
returns = data.pct_change().dropna()
# Ensure no inf or NaN values in returns
returns = returns.replace([np.inf, -np.inf], np.nan).dropna()
if returns.empty or len(returns) <= 1:
st.warning("Not enough data to perform further analysis.")
else:
st.markdown("### Kalman Filter: Estimated Common Factor and Asset Returns")
st.markdown("Using the Kalman Filter to estimate a common factor influencing all asset returns.")
# Methodology in expander
with st.expander("Kalman Filter Methodology", expanded=False):
st.markdown("The Kalman Filter operates based on the following state-space model:")
st.latex(r"""
\text{State Equation:} \quad \mathbf{x}_t = \mathbf{A} \mathbf{x}_{t-1} + \mathbf{w}_t
""")
st.latex(r"""
\text{Observation Equation:} \quad \mathbf{y}_t = \mathbf{H} \mathbf{x}_t + \mathbf{v}_t
""")
st.markdown("""
Where:
- \(xt\) is the state vector (the common factor we are estimating).
- \(A\) is the state transition matrix (set to the identity matrix \(I\)).
- \(wt\) is the process noise (with covariance \(Q\)).
- \(yt\) is the observation vector (asset returns).
- \(H\) is the observation matrix (set to a vector of ones).
- \(vt\) is the observation noise (with covariance \(R\)).
""")
observations = returns.values
initial_state_mean = np.zeros(1)
kf = KalmanFilter(
transition_matrices=np.eye(1),
observation_matrices=np.ones((len(tickers), 1)),
initial_state_mean=initial_state_mean,
observation_covariance=np.eye(len(tickers)),
transition_covariance=np.eye(1)
)
state_means, state_covariances = kf.em(observations).filter(observations)
fig = go.Figure()
for i, ticker in enumerate(tickers):
fig.add_trace(go.Scatter(x=returns.index, y=observations[:, i], mode='lines', name=f'{ticker} Returns'))
fig.add_trace(go.Scatter(x=returns.index, y=state_means[:, 0], mode='lines', name='Estimated Common Factor', line=dict(color='red', width=4)))
fig.update_layout(
title='Kalman Filter: Estimated Common Factor and Asset Returns',
xaxis_title='Date',
yaxis_title='Returns',
template='plotly_dark'
)
st.plotly_chart(fig, use_container_width=True)
st.markdown("### CSSD and CSAD Calculations")
st.markdown("Calculating the Cross-Sectional Standard Deviation (CSSD) and Cross-Sectional Absolute Deviation (CSAD) of asset returns.")
# Methodology in expander
with st.expander("CSSD and CSAD Methodology", expanded=False):
st.markdown("The formulas for CSSD and CSAD are as follows:")
st.markdown("**CSSD (Cross-Sectional Standard Deviation):**")
st.latex(r"\text{CSSD}_t = \sqrt{\frac{\sum_{i=1}^{N} (R_{i,t} - \overline{R}_t)^2}{N - 1}}")
st.markdown("**CSAD (Cross-Sectional Absolute Deviation):**")
st.latex(r"\text{CSAD}_t = \frac{\sum_{i=1}^{N} |R_{i,t} - \overline{R}_t|}{N}")
market_return = returns[market_index]
returns = returns.drop(columns=[market_index])
def calculate_cssd(returns, market_return):
cssd = np.sqrt(((returns - market_return[:, None]) ** 2).sum(axis=1) / (returns.shape[1] - 1))
return cssd
def calculate_csad(returns, market_return):
csad = (np.abs(returns - market_return[:, None]).sum(axis=1)) / returns.shape[1]
return csad
cssd = calculate_cssd(returns.values, market_return.values)
csad = calculate_csad(returns.values, market_return.values)
window_size = 30
def rolling_csad(stock_returns, market_returns, window):
csad_values = []
for i in range(len(stock_returns) - window + 1):
window_data = stock_returns.iloc[i:i+window]
window_market = market_returns[i:i+window].mean()
N = len(window_data.columns)
csad = (1 / N) * np.sum(np.abs(window_data.sub(window_market, axis=0)).mean(axis=1))
csad_values.append(csad)
return pd.Series(csad_values, index=stock_returns.index[window-1:])
def rolling_cssd(stock_returns, market_returns, window):
cssd_values = []
for i in range(len(stock_returns) - window + 1):
window_data = stock_returns.iloc[i:i+window]
window_market = market_returns[i:i+window].mean()
N = len(window_data.columns)
cssd = np.sqrt((1 / (N - 1)) * np.sum(np.square(window_data.sub(window_market, axis=0)).mean(axis=1)))
cssd_values.append(cssd)
return pd.Series(cssd_values, index=stock_returns.index[window-1:])
rolling_csad_values = rolling_csad(returns, market_return, window_size)
rolling_cssd_values = rolling_cssd(returns, market_return, window_size)
fig = go.Figure()
fig.add_trace(go.Scatter(x=returns.index, y=cssd, mode='lines', name='CSSD', line=dict(color='blue')))
fig.add_trace(go.Scatter(x=returns.index, y=csad, mode='lines', name='CSAD', line=dict(color='red')))
fig.add_trace(go.Scatter(x=rolling_csad_values.index, y=rolling_csad_values, mode='lines', name='Rolling CSAD (30 days)', line=dict(color='orange')))
fig.add_trace(go.Scatter(x=rolling_cssd_values.index, y=rolling_cssd_values, mode='lines', name='Rolling CSSD (30 days)', line=dict(color='green')))
fig.update_layout(
title='CSSD, CSAD, Rolling CSAD, and Rolling CSSD over Time',
xaxis_title='Date',
yaxis_title='Value',
template='plotly_dark'
)
st.plotly_chart(fig, use_container_width=True)
st.markdown("### Network Visualization of Asset Correlations")
st.markdown("Visualizing the correlations between assets as a network.")
years = data.index.year.unique()
def plot_network_for_year(data_for_year, year, threshold):
corr_matrix = data_for_year.corr()
G = nx.Graph()
for ticker in tickers:
G.add_node(ticker)
for i in range(len(tickers)):
for j in range(i+1, len(tickers)):
if abs(corr_matrix.iloc[i, j]) > threshold:
G.add_edge(tickers[i], tickers[j], weight=corr_matrix.iloc[i, j])
pos = nx.spring_layout(G)
edge_trace = []
for edge in G.edges(data=True):
x0, y0 = pos[edge[0]]
x1, y1 = pos[edge[1]]
trace = go.Scatter(
x=[x0, x1, None],
y=[y0, y1, None],
line=dict(width=2, color='blue'),
hoverinfo='none',
mode='lines'
)
edge_trace.append(trace)
node_trace = go.Scatter(
x=[pos[node][0] for node in G.nodes()],
y=[pos[node][1] for node in G.nodes()],
text=[node for node in G.nodes()],
mode='markers+text',
textposition='top center',
hoverinfo='text',
marker=dict(
size=10,
color='red',
)
)
layout = go.Layout(
title=f'Asset Correlation Network for {year}',
showlegend=False,
hovermode='closest',
margin=dict(b=20, l=5, r=5, t=40),
xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
)
fig = go.Figure(data=edge_trace + [node_trace], layout=layout)
st.plotly_chart(fig, use_container_width=True)
for year in sorted(years, reverse=True): # Reverse the order of years
data_for_year = data[data.index.year == year]
plot_network_for_year(data_for_year, year, correlation_threshold)
hide_streamlit_style = """
<style>
#MainMenu {visibility: hidden;}
footer {visibility: hidden;}
</style>
"""
st.markdown(hide_streamlit_style, unsafe_allow_html=True)