import streamlit as st
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from pandas_datareader.data import get_data_fred
from datetime import datetime, timedelta
import statsmodels.api as sm
import plotly.figure_factory as ff
import warnings
import plotly.colors as pc
from sklearn.decomposition import PCA
from sklearn.impute import SimpleImputer

# Ignore warnings
warnings.filterwarnings("ignore")

# Set wide layout
st.set_page_config(layout="wide", page_title="U.S Treasury Yield Curve Analysis")

# Sidebar menu
st.sidebar.title("Input Parameters")

# How to use section in an expander, closed by default
with st.sidebar.expander("How to Use", expanded=False):
    st.write("""
    1. Set the start and end dates for the analysis.
    2. Click "Run Analysis" to generate the yield curve analysis.
    3. Explore various sections for dynamic interpretation, PCA analysis, and more.
    """)

# Asset symbols and dates inside an expander, open by default
with st.sidebar.expander("Dates Specification", expanded=True):
    start_date = st.date_input("Start Date", datetime(2023, 1, 1), help="Select the start date for the analysis.")
    end_date = st.date_input("End Date", datetime.now().date() + timedelta(days=1), help="Select the end date for the analysis.")

tickers = ['DGS1MO', 'DGS3MO', 'DGS6MO', 'DGS1', 'DGS2', 'DGS3', 'DGS5', 'DGS7', 'DGS10', 'DGS20', 'DGS30']
labels = {
    'DGS1MO': '1 Month',
    'DGS3MO': '3 Months',
    'DGS6MO': '6 Months',
    'DGS1': '1 Year',
    'DGS2': '2 Years',
    'DGS3': '3 Years',
    'DGS5': '5 Years',
    'DGS7': '7 Years',
    'DGS10': '10 Years',
    'DGS20': '20 Years',
    'DGS30': '30 Years'
}

# Overview and Purpose
st.title("Yield Curve Analysis")
st.write("""
The U.S. Treasury yield curve is a graph that plots the yields of Treasury securities at fixed maturities.
Yield curves help understand the term structure of interest rates for assessing economic conditions.
This tool analyzes the yield curve, yield spreads, and trends to provide insights into the economic outlook.
""")


with st.sidebar.expander("Key Visualizations:", expanded=False):
    st.write("""
    Key Visualizations:
    - **Time Series of Treasury Yields**: Tracks the movement of yields across different maturities over time.
    - **Yield Curve Slope Interpretation**: Assesses the economic outlook based on the slope of the yield curve.
    - **Yield Spreads Over Time**: Highlights differences between yields on various maturities to predict economic changes.
    - **3D Surface Plot of Yield Curve Over Time**: Provides a dynamic 3D view of how the yield structure evolves over time.
    - **Correlation Heatmap of Treasury Yields**: Shows relationships between different maturities.
    - **Markov Switching Model Analysis**: Identifies different economic regimes using yield spread data.
    - **Principal Component Analysis (PCA)**: Extracts key factors driving yield curve variations.
    """)

if st.sidebar.button("Run Analysis"):
    yield_data = get_data_fred(tickers, start=start_date, end=end_date)
    yield_data.index = yield_data.index.date

    # Handle missing values by imputing
    imputer = SimpleImputer(strategy='mean')
    yield_data_imputed = pd.DataFrame(imputer.fit_transform(yield_data), columns=yield_data.columns, index=yield_data.index)

    # Plot the time series for each term using Plotly
    st.subheader("U.S. Treasury Yield Curve Time Series")
    fig = go.Figure()
    for ticker in tickers:
        fig.add_trace(go.Scatter(x=yield_data_imputed.index, y=yield_data_imputed[ticker], mode='lines', name=labels[ticker]))

    fig.update_layout(
        title='U.S. Treasury Yield Curve Time Series',
        xaxis_title='Date',
        yaxis_title='Yield (%)',
        legend_title_text='Maturity'
    )
    st.plotly_chart(fig)

    # Dynamic interpretation
    max_yield = yield_data_imputed.max().max()
    min_yield = yield_data_imputed.min().min()
    mean_yield = yield_data_imputed.mean().mean()

    st.write(f"The highest yield observed in the time series is {max_yield:.2f}%.")
    st.write(f"The lowest yield observed in the time series is {min_yield:.2f}%.")
    st.write(f"The average yield across all maturities and dates is {mean_yield:.2f}%.")

    # Interpretation based on the slope of the yield curve
    slope_indicator = yield_data_imputed['DGS30'] - yield_data_imputed['DGS1MO']
    average_slope = slope_indicator.mean()

    with st.expander("Yield Curve Slope and Trend Interpretation", expanded=False):
        st.subheader("Yield Curve Slope Interpretation")
        st.write("The slope of the yield curve is an important indicator of economic expectations:")
        st.latex(r'''
        \text{Slope} = \text{Yield}_{30\text{Year}} - \text{Yield}_{1\text{Month}}
        ''')
    
        if average_slope > 0:
            st.write("On average, the yield curve shows an upward slope, which often indicates positive economic growth expectations.")
        else:
            st.write("On average, the yield curve shows a downward slope, which may suggest a potential economic slowdown or recession.")
    
        # Interpretation based on the trend
        trend = yield_data_imputed.diff().mean()
        positive_trend_count = (trend > 0).sum()
        negative_trend_count = (trend < 0).sum()
    
        st.subheader("Yield Curve Trend Interpretation")
        st.write("Analyzing the trend of the yield curve helps in understanding the direction of interest rates:")
        if positive_trend_count > negative_trend_count:
            st.write("Overall, yields are increasing over time, indicating rising interest rates.")
        else:
            st.write("Overall, yields are decreasing over time, indicating falling interest rates.")

    # Calculate the spreads
    spreads = pd.DataFrame({
        '10Y-2Y': yield_data_imputed['DGS10'] - yield_data_imputed['DGS2'],
        '10Y-3M': yield_data_imputed['DGS10'] - yield_data_imputed['DGS3MO'],
        '5Y-2Y': yield_data_imputed['DGS5'] - yield_data_imputed['DGS2'],
        '30Y-10Y': yield_data_imputed['DGS30'] - yield_data_imputed['DGS10'],
        '7Y-1Y': yield_data_imputed['DGS7'] - yield_data_imputed['DGS1'],
        '10Y-1Y': yield_data_imputed['DGS10'] - yield_data_imputed['DGS1']
    })

    # Plot the spreads over time using Plotly
    st.subheader("Yield Spreads Over Time")
    
    fig2 = go.Figure()
    for spread in spreads.columns:
        fig2.add_trace(go.Scatter(x=spreads.index, y=spreads[spread], mode='lines', name=spread))

    fig2.update_layout(
        title='Yield Spreads Over Time',
        xaxis_title='Date',
        yaxis_title='Spread (bps)',
        legend_title_text='Spread'
    )
    st.plotly_chart(fig2)

    # Dynamic interpretation for spreads
    with st.expander("Interpretation of Yield Spreads", expanded=False):
        st.subheader("Interpretation of Yield Spreads")
        st.write("""
        Yield spreads are the differences between yields on different maturities of bonds. They are often used to predict economic changes:
        """)
        spread_stats = spreads.describe()
        st.write(spread_stats)
    
        for spread in spreads.columns:
            max_spread = spreads[spread].max()
            min_spread = spreads[spread].min()
            mean_spread = spreads[spread].mean()
    
            st.write(f"\nFor {spread}:")
            st.write(f"The highest spread observed is {max_spread:.2f} basis points.")
            st.write(f"The lowest spread observed is {min_spread:.2f} basis points.")
            st.write(f"The average spread over the period is {mean_spread:.2f} basis points.")
    
            if mean_spread > 0:
                st.write(f"On average, the {spread} spread is positive.")
                if spread == '10Y-2Y' or spread == '10Y-3M':
                    st.write("This suggests that investors expect higher yields for longer-term bonds compared to shorter-term bonds, which is typical in a healthy, growing economy.")
                elif spread == '30Y-10Y':
                    st.write("A positive 30Y-10Y spread indicates that long-term yields are higher than medium-term yields, reflecting stable long-term growth expectations.")
                elif spread == '5Y-2Y' or spread == '7Y-1Y' or spread == '10Y-1Y':
                    st.write("Positive spreads here indicate that yields increase with maturity, reflecting investor confidence in steady economic growth.")
            else:
                st.write(f"On average, the {spread} spread is negative.")
                if spread == '10Y-2Y' or spread == '10Y-3M':
                    st.write("This suggests that investors expect lower yields for longer-term bonds compared to shorter-term bonds, often seen as a warning sign of an impending recession. This is known as a yield inversion.")
                elif spread == '30Y-10Y':
                    st.write("A negative 30Y-10Y spread suggests that long-term economic growth expectations are lower than medium-term expectations, potentially signaling long-term economic concerns. This can also indicate a yield inversion.")
                elif spread == '5Y-2Y' or spread == '7Y-1Y' or spread == '10Y-1Y':
                    st.write("Negative spreads here indicate that yields decrease with maturity, reflecting investor pessimism about future economic conditions. This situation is also referred to as a yield inversion.")
    
    # Correlation matrix for the yield data
    st.subheader("Correlation Heatmap of U.S. Treasury Yields")
    correlation_matrix = yield_data_imputed.corr()

    # Plot the heatmap of the correlation matrix using Plotly
    fig3 = ff.create_annotated_heatmap(
        z=correlation_matrix.values,
        x=list(labels.values()),
        y=list(labels.values()),
        annotation_text=correlation_matrix.round(2).values,
        colorscale='RdBu',
        showscale=True,
        reversescale=True
    )
    fig3.update_layout(
        title="Correlation Heatmap of U.S. Treasury Yields",
        xaxis_title="Maturity",
        yaxis_title="Maturity"
    )
    st.plotly_chart(fig3)

    # Prepare data for Plotly
    yield_data_imputed.reset_index(inplace=True)
    yield_data_imputed.rename(columns={'index': 'DATE'}, inplace=True)
    yield_data_long = yield_data_imputed.melt(id_vars='DATE', var_name='Maturity', value_name='Yield')
    
    # Pivot the data for the surface plot
    z_data = yield_data_long.pivot(index='DATE', columns='Maturity', values='Yield').values
    x_data = yield_data_long['DATE'].unique()
    y_data = [labels[m] for m in yield_data_long['Maturity'].unique()]

    # Create a 3D surface plot for the yield curve over time
    st.subheader("3D Surface Plot of U.S. Treasury Yield Curve Over Time")
    fig4 = go.Figure(data=[go.Surface(
        z=z_data,
        x=x_data,
        y=y_data,
        colorscale='Viridis',
        contours={
            "z": {"show": True, "start": z_data.min(), "end": z_data.max(), "size": 0.5, "color": "white"},
        }
    )])

    fig4.update_layout(
        title='3D Surface Plot of U.S. Treasury Yield Curve Over Time',
        scene=dict(
            xaxis_title='Date',
            yaxis_title='Maturity',
            zaxis_title='Yield (%)',
            yaxis=dict(type='category'),
            xaxis=dict(type='date'),
            camera=dict(
                eye=dict(x=-1.25, y=-1.25, z=0.5)  # Adjust this to change the angle
            )
        ),
        margin=dict(l=0, r=0, b=0, t=40),  # Adjust margins for better fit
        height=700  # Adjust height as needed
    )

    st.plotly_chart(fig4)

    # Additional analysis using Plotly Express with a custom color sequence
    yield_data_long['Maturity'] = pd.Categorical(yield_data_long['Maturity'], categories=list(labels.keys()), ordered=True)
    yield_data_long['Yield'] = pd.to_numeric(yield_data_long['Yield'])
    yield_data_long.sort_values(['DATE', 'Maturity'], inplace=True)

    num_dates = yield_data_long['DATE'].nunique()
    color_scale = pc.sample_colorscale('Viridis', [n / num_dates for n in range(num_dates)])

    fig5 = px.line(yield_data_long, x='Maturity', y='Yield', color='DATE',
                   title='Interactive U.S. Treasury Yield Curve',
                   labels={'Yield': 'Yield (%)', 'Maturity': 'Maturity'},
                   color_discrete_sequence=color_scale)

    fig5.update_layout(
        xaxis=dict(
            tickvals=list(labels.keys()),
            ticktext=list(labels.values())
        )
    )
    st.plotly_chart(fig5)

    # Dynamic interpretation for the interactive yield curve
    with st.expander("Dynamic Interpretation for the Interactive Yield Curve", expanded=False):
        st.write("Dynamic Interpretation of Interactive U.S. Treasury Yield Curve:")
        slope_analysis = yield_data_long.groupby('DATE').apply(lambda df: df['Yield'].diff().mean())
    
        positive_slope_dates = slope_analysis[slope_analysis > 0].index
        negative_slope_dates = slope_analysis[slope_analysis < 0].index
    
        if len(positive_slope_dates) > len(negative_slope_dates):
            st.write("Most of the time, the yield curve has an upward slope, indicating positive economic growth expectations.")
        else:
            st.write("Most of the time, the yield curve has a downward slope, suggesting economic slowdown or recession concerns.")
    
        st.write("\nPractical Insights:")
        st.write("1. **Positive Slope (Normal Yield Curve)**: Indicates investor confidence in economic growth and rising interest rates. Long-term yields are higher than short-term yields.")
        st.write("2. **Negative Slope (Inverted Yield Curve)**: Often seen as a predictor of recession. Investors expect lower yields in the long term due to economic uncertainty or expected downturn.")
        st.write("3. **Flat or Humped Curve**: Indicates transitional phases in the economy. Investors may be uncertain about future growth or inflation.")
    
        st.write("\nKey Observations:")
        if len(positive_slope_dates) > 0:
            st.write(f"The yield curve was upward sloping on these dates: {positive_slope_dates[0]} to {positive_slope_dates[-1]}")
        if len(negative_slope_dates) > 0:
            st.write(f"The yield curve was downward sloping on these dates: {negative_slope_dates[0]} to {negative_slope_dates[-1]}")

    # Calculate the 10Y-2Y spread
    yield_data_imputed['10Y-2Y Spread'] = yield_data_imputed['DGS10'] - yield_data_imputed['DGS2']

    # Fit a Markov Switching Model
    st.subheader("Markov Switching Model Analysis")
    st.write("We use a Markov Switching Model to identify different regimes in the yield spread data:")

    with st.expander("Markov Switching Model Methodology", expanded=False):
        st.latex(r'''
        y_t = \mu_{s_t} + \epsilon_t \\
        \epsilon_t \sim N(0, \sigma_{s_t}^2)
        ''')

    mod = sm.tsa.MarkovRegression(yield_data_imputed['10Y-2Y Spread'], k_regimes=2, trend='c')
    res = mod.fit()

    # Plot the spread with the identified regimes using Plotly
    fig6 = go.Figure()
    fig6.add_trace(go.Scatter(x=yield_data_imputed.index, y=yield_data_imputed['10Y-2Y Spread'], mode='lines', name='10Y-2Y Spread', line=dict(color='blue')))
    fig6.add_trace(go.Scatter(x=yield_data_imputed.index, y=res.filtered_marginal_probabilities[0], mode='lines', name='Regime 1 Probability', line=dict(dash='dash', color='red')))
    fig6.add_trace(go.Scatter(x=yield_data_imputed.index, y=res.filtered_marginal_probabilities[1], mode='lines', name='Regime 2 Probability', line=dict(dash='dot', color='green')))

    fig6.update_layout(
        title='10Y-2Y Spread and Regime Probabilities',
        xaxis_title='Date',
        yaxis_title='10Y-2Y Spread / Regime Probability',
        legend_title_text='Legend'
    )
    st.plotly_chart(fig6)

    # Analyze regime characteristics
    regime_durations = res.expected_durations
    st.write(f'Expected Duration of Regime 1: {regime_durations[0]:.2f} days')
    st.write(f'Expected Duration of Regime 2: {regime_durations[1]:.2f} days')

    # Dynamic interpretation of the spread and regime probabilities
    spread_mean = yield_data_imputed['10Y-2Y Spread'].mean()
    spread_std = yield_data_imputed['10Y-2Y Spread'].std()

    with st.expander("Dynamic Interpretation of the 10Y-2Y Spread and Regime Probabilities", expanded=False):
        st.write(f"The mean of the 10Y-2Y spread is {spread_mean:.2f} basis points with a standard deviation of {spread_std:.2f} basis points.")
    
        # Regime 1 interpretation
        regime1_mean = res.smoothed_marginal_probabilities[0].mean()
        if regime1_mean > 0.5:
            st.write(f"Regime 1 is the dominant regime with an average probability of {regime1_mean:.2f}.")
            st.write("Practical Insight: Regime 1 may represent periods of economic stability or growth, with the 10Y-2Y spread typically positive, indicating a normal yield curve.")
        else:
            st.write(f"Regime 1 has an average probability of {regime1_mean:.2f}. It is not the dominant regime.")
            st.write("Practical Insight: Regime 1 may represent transitional periods or times of uncertainty in the economic cycle.")
    
        # Regime 2 interpretation
        regime2_mean = res.smoothed_marginal_probabilities[1].mean()
        if regime2_mean > 0.5:
            st.write(f"Regime 2 is the dominant regime with an average probability of {regime2_mean:.2f}.")
            st.write("Practical Insight: Regime 2 may represent periods of economic stress or recession, with the 10Y-2Y spread often negative, indicating an inverted yield curve.")
        else:
            st.write(f"Regime 2 has an average probability of {regime2_mean:.2f}. It is not the dominant regime.")
            st.write("Practical Insight: Regime 2 may represent transitional periods or times of uncertainty in the economic cycle.")
    
        st.write("\nExpected Duration of Regimes:")
        st.write(f"Regime 1: {regime_durations[0]:.2f} days")
        st.write(f"Regime 2: {regime_durations[1]:.2f} days")
    
        if regime_durations[0] > regime_durations[1]:
            st.write("Regime 1 tends to last longer, indicating longer periods of economic stability or growth.")
        else:
            st.write("Regime 2 tends to last longer, indicating longer periods of economic stress or recession.")

    # PCA Analysis
    st.subheader("Principal Component Analysis (PCA)")
    
    # Handle missing values by imputing
    imputer = SimpleImputer(strategy='mean')
    yield_data_imputed = pd.DataFrame(imputer.fit_transform(yield_data), columns=yield_data.columns, index=yield_data.index)

    # Ensure there are no remaining NaN values
    yield_data_imputed.dropna(inplace=True)

    # Standardize the data
    yield_data_standardized = (yield_data_imputed - yield_data_imputed.mean()) / yield_data_imputed.std()

    # Apply PCA
    pca = PCA(n_components=2)
    principal_components = pca.fit_transform(yield_data_standardized)

    # Create a DataFrame for the principal components
    pca_df = pd.DataFrame(data=principal_components, columns=['PC1', 'PC2'], index=yield_data_imputed.index)

    # Get the loadings (coefficients) of the original variables on the principal components
    loadings = pd.DataFrame(pca.components_.T, columns=['PC1', 'PC2'], index=yield_data_imputed.columns)

    # Explained variance
    explained_variance = pca.explained_variance_ratio_

    # Plot the principal components over time using Plotly
    fig7 = go.Figure()
    fig7.add_trace(go.Scatter(x=pca_df.index, y=pca_df['PC1'], mode='lines', name='PC1'))
    fig7.add_trace(go.Scatter(x=pca_df.index, y=pca_df['PC2'], mode='lines', name='PC2'))

    fig7.update_layout(
        title='Principal Components Over Time',
        xaxis_title='Date',
        yaxis_title='Principal Component Value',
        legend_title_text='Principal Component'
    )
    st.plotly_chart(fig7)

    # Enhanced Interpretation
    def interpret_pca(pc1, pc2, loadings, explained_variance):
        # Interpretation of the loadings
        st.write("\nPrincipal Components Interpretation:")
        st.write(f"PC1 explains {explained_variance[0]:.2f} of the variance and is mainly driven by these maturities:")
        st.write(loadings['PC1'].sort_values(ascending=False))

        st.write(f"\nPC2 explains {explained_variance[1]:.2f} of the variance and is mainly driven by these maturities:")
        st.write(loadings['PC2'].sort_values(ascending=False))

        # Dynamic interpretation based on changes over time
        st.write("\nDynamic Interpretation of Principal Component Scores:")
        
        # Changes in PC1
        pc1_diff = pc1.diff().dropna()
        st.write("\nPC1 Analysis:")
        if pc1_diff.mean() > 0:
            st.write("On average, PC1 has been increasing over time, indicating a general rise in interest rates.")
            st.write("Action: Consider reducing bond holdings or shifting to shorter-duration bonds to minimize interest rate risk.")
        else:
            st.write("On average, PC1 has been decreasing over time, indicating a general fall in interest rates.")
            st.write("Action: Consider increasing bond holdings to benefit from rising bond prices or locking in higher yields.")

        # Changes in PC2
        pc2_diff = pc2.diff().dropna()
        st.write("\nPC2 Analysis:")
        if pc2_diff.mean() > 0:
            st.write("On average, PC2 has been increasing over time, indicating a steepening yield curve.")
            st.write("Action: Consider investing in long-term bonds to take advantage of higher future yields or growth-oriented investments.")
        else:
            st.write("On average, PC2 has been decreasing over time, indicating a flattening or inverting yield curve.")
            st.write("Action: Consider shifting towards safer assets or short-term bonds to avoid potential losses from long-term bonds and prepare for potential economic downturns.")
        
        # Volatility analysis
        st.write("\nVolatility Analysis:")
        pc1_volatility = pc1_diff.std()
        pc2_volatility = pc2_diff.std()
        st.write(f"PC1 Volatility: {pc1_volatility:.2f}")
        st.write(f"PC2 Volatility: {pc2_volatility:.2f}")
        
        if pc1_volatility > pc2_volatility:
            st.write("PC1 is more volatile than PC2, indicating greater fluctuations in the overall level of interest rates compared to the yield curve slope.")
        else:
            st.write("PC2 is more volatile than PC1, indicating greater fluctuations in the yield curve slope compared to the overall level of interest rates.")

    # Call the interpretation function
    with st.expander("Principal Components Interpretation", expanded=False):
        # Call the interpretation function
        interpret_pca(pca_df['PC1'], pca_df['PC2'], loadings, explained_variance)

hide_streamlit_style = """
<style>
#MainMenu {visibility: hidden;}
footer {visibility: hidden;}
</style>
"""
st.markdown(hide_streamlit_style, unsafe_allow_html=True)