import streamlit as st
import yfinance as yf
import pandas as pd
import numpy as np
from pykalman import KalmanFilter
import plotly.graph_objs as go
import networkx as nx

# Streamlit app setup
st.set_page_config(layout="wide")

st.title("Herding Behaviour Analysis in Financial Markets")

st.markdown("This app analyzes herding behavior in financial markets by examining price movements and correlations.")

# Sidebar: How to Use (closed by default)
with st.sidebar.expander("How to Use", expanded=False):
    st.write(
        """
        1. Select the stock ticker or crypto pairs.
        2. Choose the time period.
        3. Set additional parameters for the analyses.
        4. Click 'Run Analysis' to see the results.
        """
    )

# Sidebar: Assets and Dates (open by default)
with st.sidebar.expander("Assets and Dates", expanded=True):
    tickers = st.text_area("Asset Symbols (Crypto-Pair or Stock Ticker) (comma-separated)", 
                           value="BTC-USD,ETH-USD,BNB-USD,ADA-USD,SOL-USD,DOT-USD,DOGE-USD,AVAX-USD,MATIC-USD,LTC-USD,LUNA1-USD,LINK-USD,ALGO-USD,ATOM-USD,FTT-USD,TRX-USD,ETC-USD,FIL-USD,XMR-USD,XLM-USD",
                           help="Enter the ticker symbols for the assets you want to analyze, separated by commas. E.g., 'BTC-USD, ETH-USD'.").split(",")
    start_date = st.date_input("Start Date", value=pd.to_datetime("2020-01-01"),
                               help="Select the start date for the analysis period.")
    end_date = st.date_input("End Date", value=pd.to_datetime(pd.Timestamp.now().date() + pd.Timedelta(days=1)),
                             help="Select the end date for the analysis period.")

# Sidebar: Market Index and Correlation (open by default)
with st.sidebar.expander("Market Index and Correlation", expanded=True):
    market_index = st.text_input("Market Index Ticker", value="BTC-USD",
                                 help="Enter the ticker symbol for the market index, e.g., 'BTC-USD'.")
    correlation_threshold = st.slider("Correlation Threshold (for Network Analysis)", 
                                      min_value=0.0, max_value=1.0, value=0.75, step=0.05,
                                      help="Set the threshold for correlation in the network analysis.")

# Run Analysis button
run_button = st.sidebar.button("Run Analysis")

if run_button:
    # Include market index in the tickers list if not already present
    if market_index not in tickers:
        tickers.append(market_index)

    # Fetching Asset data
    data = yf.download(tickers, start=start_date, end=end_date)['Close']

    # Clean the data by filling or dropping NaN and infinite values
    data = data.fillna(method='ffill').dropna()
    data = data.replace([np.inf, -np.inf], np.nan).dropna()

    if data.empty:
        st.warning("No data available for the given assets and date range.")
    else:
        st.markdown("### Asset Prices Reindexed to Start at 0")
        st.markdown("Reindexed asset prices to compare their relative movements over time.")

        data_reindexed = data.apply(lambda x: x / x.iloc[0])
        fig = go.Figure()

        for ticker in tickers:
            fig.add_trace(go.Scatter(x=data_reindexed.index, y=data_reindexed[ticker], mode='lines', name=ticker))

        fig.update_layout(
            title="Asset Prices Reindexed to Start at 0",
            xaxis_title="Date",
            yaxis_title="Reindexed Price",
            template="plotly_dark"
        )
        st.plotly_chart(fig, use_container_width=True)

        # Calculating daily returns
        returns = data.pct_change().dropna()

        # Ensure no inf or NaN values in returns
        returns = returns.replace([np.inf, -np.inf], np.nan).dropna()

        if returns.empty or len(returns) <= 1:
            st.warning("Not enough data to perform further analysis.")
        else:
            st.markdown("### Kalman Filter: Estimated Common Factor and Asset Returns")
            st.markdown("Using the Kalman Filter to estimate a common factor influencing all asset returns.")

            # Methodology in expander
            with st.expander("Kalman Filter Methodology", expanded=False):
                st.markdown("The Kalman Filter operates based on the following state-space model:")
                st.latex(r"""
                \text{State Equation:} \quad \mathbf{x}_t = \mathbf{A} \mathbf{x}_{t-1} + \mathbf{w}_t
                """)
                st.latex(r"""
                \text{Observation Equation:} \quad \mathbf{y}_t = \mathbf{H} \mathbf{x}_t + \mathbf{v}_t
                """)
                st.markdown("""
                Where:
                - \(xt\) is the state vector (the common factor we are estimating).
                - \(A\) is the state transition matrix (set to the identity matrix \(I\)).
                - \(wt\) is the process noise (with covariance \(Q\)).
                - \(yt\) is the observation vector (asset returns).
                - \(H\) is the observation matrix (set to a vector of ones).
                - \(vt\) is the observation noise (with covariance \(R\)).
                """)

            observations = returns.values
            initial_state_mean = np.zeros(1)  
            kf = KalmanFilter(
                transition_matrices=np.eye(1),
                observation_matrices=np.ones((len(tickers), 1)),
                initial_state_mean=initial_state_mean,
                observation_covariance=np.eye(len(tickers)),
                transition_covariance=np.eye(1)
            )

            state_means, state_covariances = kf.em(observations).filter(observations)
            fig = go.Figure()

            for i, ticker in enumerate(tickers):
                fig.add_trace(go.Scatter(x=returns.index, y=observations[:, i], mode='lines', name=f'{ticker} Returns'))

            fig.add_trace(go.Scatter(x=returns.index, y=state_means[:, 0], mode='lines', name='Estimated Common Factor', line=dict(color='red', width=4)))
            fig.update_layout(
                title='Kalman Filter: Estimated Common Factor and Asset Returns',
                xaxis_title='Date',
                yaxis_title='Returns',
                template='plotly_dark'
            )
            st.plotly_chart(fig, use_container_width=True)

            st.markdown("### CSSD and CSAD Calculations")
            st.markdown("Calculating the Cross-Sectional Standard Deviation (CSSD) and Cross-Sectional Absolute Deviation (CSAD) of asset returns.")

            # Methodology in expander
            with st.expander("CSSD and CSAD Methodology", expanded=False):
                st.markdown("The formulas for CSSD and CSAD are as follows:")
                st.markdown("**CSSD (Cross-Sectional Standard Deviation):**")
                st.latex(r"\text{CSSD}_t = \sqrt{\frac{\sum_{i=1}^{N} (R_{i,t} - \overline{R}_t)^2}{N - 1}}")
                st.markdown("**CSAD (Cross-Sectional Absolute Deviation):**")
                st.latex(r"\text{CSAD}_t = \frac{\sum_{i=1}^{N} |R_{i,t} - \overline{R}_t|}{N}")

            market_return = returns[market_index]
            returns = returns.drop(columns=[market_index])

            def calculate_cssd(returns, market_return):
                cssd = np.sqrt(((returns - market_return[:, None]) ** 2).sum(axis=1) / (returns.shape[1] - 1))
                return cssd

            def calculate_csad(returns, market_return):
                csad = (np.abs(returns - market_return[:, None]).sum(axis=1)) / returns.shape[1]
                return csad

            cssd = calculate_cssd(returns.values, market_return.values)
            csad = calculate_csad(returns.values, market_return.values)

            window_size = 30

            def rolling_csad(stock_returns, market_returns, window):
                csad_values = []
                for i in range(len(stock_returns) - window + 1):
                    window_data = stock_returns.iloc[i:i+window]
                    window_market = market_returns[i:i+window].mean()
                    N = len(window_data.columns)
                    csad = (1 / N) * np.sum(np.abs(window_data.sub(window_market, axis=0)).mean(axis=1))
                    csad_values.append(csad)
                return pd.Series(csad_values, index=stock_returns.index[window-1:])

            def rolling_cssd(stock_returns, market_returns, window):
                cssd_values = []
                for i in range(len(stock_returns) - window + 1):
                    window_data = stock_returns.iloc[i:i+window]
                    window_market = market_returns[i:i+window].mean()
                    N = len(window_data.columns)
                    cssd = np.sqrt((1 / (N - 1)) * np.sum(np.square(window_data.sub(window_market, axis=0)).mean(axis=1)))
                    cssd_values.append(cssd)
                return pd.Series(cssd_values, index=stock_returns.index[window-1:])

            rolling_csad_values = rolling_csad(returns, market_return, window_size)
            rolling_cssd_values = rolling_cssd(returns, market_return, window_size)

            fig = go.Figure()
            fig.add_trace(go.Scatter(x=returns.index, y=cssd, mode='lines', name='CSSD', line=dict(color='blue')))
            fig.add_trace(go.Scatter(x=returns.index, y=csad, mode='lines', name='CSAD', line=dict(color='red')))
            fig.add_trace(go.Scatter(x=rolling_csad_values.index, y=rolling_csad_values, mode='lines', name='Rolling CSAD (30 days)', line=dict(color='orange')))
            fig.add_trace(go.Scatter(x=rolling_cssd_values.index, y=rolling_cssd_values, mode='lines', name='Rolling CSSD (30 days)', line=dict(color='green')))
            fig.update_layout(
                title='CSSD, CSAD, Rolling CSAD, and Rolling CSSD over Time',
                xaxis_title='Date',
                yaxis_title='Value',
                template='plotly_dark'
            )
            st.plotly_chart(fig, use_container_width=True)

            st.markdown("### Network Visualization of Asset Correlations")
            st.markdown("Visualizing the correlations between assets as a network.")

            years = data.index.year.unique()

            def plot_network_for_year(data_for_year, year, threshold):
                corr_matrix = data_for_year.corr()
                G = nx.Graph()

                for ticker in tickers:
                    G.add_node(ticker)

                for i in range(len(tickers)):
                    for j in range(i+1, len(tickers)):
                        if abs(corr_matrix.iloc[i, j]) > threshold:
                            G.add_edge(tickers[i], tickers[j], weight=corr_matrix.iloc[i, j])

                pos = nx.spring_layout(G)
                edge_trace = []

                for edge in G.edges(data=True):
                    x0, y0 = pos[edge[0]]
                    x1, y1 = pos[edge[1]]
                    trace = go.Scatter(
                        x=[x0, x1, None],
                        y=[y0, y1, None],
                        line=dict(width=2, color='blue'),
                        hoverinfo='none',
                        mode='lines'
                    )
                    edge_trace.append(trace)

                node_trace = go.Scatter(
                    x=[pos[node][0] for node in G.nodes()],
                    y=[pos[node][1] for node in G.nodes()],
                    text=[node for node in G.nodes()],
                    mode='markers+text',
                    textposition='top center',
                    hoverinfo='text',
                    marker=dict(
                        size=10,
                        color='red',
                    )
                )

                layout = go.Layout(
                    title=f'Asset Correlation Network for {year}',
                    showlegend=False,
                    hovermode='closest',
                    margin=dict(b=20, l=5, r=5, t=40),
                    xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                    yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
                )

                fig = go.Figure(data=edge_trace + [node_trace], layout=layout)
                st.plotly_chart(fig, use_container_width=True)

            for year in sorted(years, reverse=True):  # Reverse the order of years
                data_for_year = data[data.index.year == year]
                plot_network_for_year(data_for_year, year, correlation_threshold)

hide_streamlit_style = """
<style>
#MainMenu {visibility: hidden;}
footer {visibility: hidden;}
</style>
"""
st.markdown(hide_streamlit_style, unsafe_allow_html=True)