import pandas as pd import numpy as np import yfinance as yf import plotly.graph_objects as go import plotly.express as px from plotly.subplots import make_subplots from scipy import stats import streamlit as st from statsmodels.tsa.seasonal import STL from statsmodels.tsa.stattools import acf, pacf # Helper function to fetch stock or crypto data def fetch_stock_data(ticker: str, start_date: str, end_date: str) -> pd.DataFrame: """Fetch stock or crypto data from Yahoo Finance.""" data = yf.download(ticker, start=start_date, end=end_date, auto_adjust=False) if isinstance(data.columns, pd.MultiIndex): data.columns = data.columns.get_level_values(0) return data.dropna() # Descriptive statistics methods def plot_candlestick(data: pd.DataFrame, ticker: str) -> go.Figure: """Plot candlestick chart.""" fig = go.Figure(data=[go.Candlestick(x=data.index, open=data['Open'], high=data['High'], low=data['Low'], close=data['Close'])]) fig.update_layout(title=f"{ticker} Candlestick Chart", xaxis_title="Date", yaxis_title="Price", hovermode="x", hoverlabel=dict(bgcolor="white", font_size=12), height=800) return fig def plot_returns_distribution(data: pd.DataFrame, ticker: str, return_periods: dict) -> go.Figure: """Plot returns distribution.""" returns = {period: data['Close'].resample(freq).last().pct_change().dropna() for period, freq in return_periods.items()} fig = make_subplots(rows=3, cols=2, subplot_titles=[f'{ticker} {period} Down Days' for period in returns.keys()] + [f'{ticker} {period} Up Days' for period in returns.keys()]) for i, (return_period, return_data) in enumerate(returns.items()): up_days = return_data[return_data > 0] down_days = return_data[return_data < 0] for j, (dataset, label) in enumerate(zip([down_days, up_days], ['Down days', 'Up days'])): median = dataset.median() if not dataset.empty else 0 fig.add_trace(go.Histogram(x=dataset, nbinsx=35, name=f'{label}', histnorm='probability', opacity=0.5, marker_color='blue' if label == 'Up days' else 'red', showlegend=False), row=(i*2)//2+1, col=j+1) fig.add_vline(x=median, line=dict(color='green', dash='dash'), annotation_text=f'Median: {median:.2f}', row=(i*2)//2+1, col=j+1) fig.update_xaxes(tickformat=".2%", tickangle=90, row=(i*2)//2+1, col=j+1) fig.update_yaxes(title_text='Proportion', row=(i*2)//2+1, col=j+1) fig.update_layout(title_text=f'{ticker} {return_period} Distribution') return fig def plot_probability_plots(data: pd.DataFrame, ticker: str) -> go.Figure: """Plot probability plots.""" data["Daily_Return"] = data["Adj Close"].pct_change().dropna() daily_returns = data["Daily_Return"].values # Remove non-finite values daily_returns = daily_returns[np.isfinite(daily_returns)] # Fit the data to a Student's t-distribution degree_of_freedom, loc, scale = stats.t.fit(daily_returns) if len(daily_returns) > 2 else (3, 0, 1) # Default fallback # Create the figure for the subplots fig = make_subplots(rows=1, cols=2, subplot_titles=[ f'{ticker} Daily Returns - Normal Probability Plot', f'{ticker} Daily Returns - Student\'s t-distribution Probability Plot']) # Normal Probability Plot osm, osr = stats.probplot(daily_returns, dist="norm") fig.add_trace(go.Scatter(x=osm[0], y=osm[1], mode='markers', name='Normal Q-Q', showlegend=False), row=1, col=1) # Calculate the line of best fit for Normal Probability Plot slope, intercept, _, _, _ = stats.linregress(osm[0], osm[1]) if len(osm[0]) > 1 else (1, 0, 0, 0, 0) line = slope * np.array(osm[0]) + intercept fig.add_trace(go.Scatter(x=osm[0], y=line, mode='lines', name='45-degree line', showlegend=False), row=1, col=1) # Student's t-distribution Probability Plot osm, osr = stats.probplot(daily_returns, dist=stats.t(degree_of_freedom, loc, scale)) fig.add_trace(go.Scatter(x=osm[0], y=osm[1], mode='markers', name='T-dist Q-Q', showlegend=False), row=1, col=2) # Calculate the line of best fit for T-distribution Probability Plot slope, intercept, _, _, _ = stats.linregress(osm[0], osm[1]) if len(osm[0]) > 1 else (1, 0, 0, 0, 0) line = slope * np.array(osm[0]) + intercept fig.add_trace(go.Scatter(x=osm[0], y=line, mode='lines', name='45-degree line', showlegend=False), row=1, col=2) fig.update_layout(height=600, width=1200, title_text=f'{ticker} Probability Plots') return fig def plot_time_series_analysis(data: pd.DataFrame, ticker: str, period: int) -> go.Figure: """Plot time series analysis.""" price_data = data["Close"] stl = STL(price_data, period=period) decomposition = stl.fit() trend = decomposition.trend seasonal = decomposition.seasonal residual = decomposition.resid fig = make_subplots(rows=4, cols=1, subplot_titles=['Original Time Series', 'Trend Component', 'Seasonal Component', 'Residual Component']) fig.add_trace(go.Scatter(x=price_data.index, y=price_data, mode='lines', name='Original'), row=1, col=1) fig.add_trace(go.Scatter(x=trend.index, y=trend, mode='lines', name='Trend', line=dict(color='orange')), row=2, col=1) fig.add_trace(go.Scatter(x=seasonal.index, y=seasonal, mode='lines', name='Seasonal', line=dict(color='green')), row=3, col=1) fig.add_trace(go.Scatter(x=residual.index, y=residual, mode='lines', name='Residual', line=dict(color='red')), row=4, col=1) fig.update_layout(height=900, width=1200, title_text=f'{ticker} Time Series Analysis') return fig def plot_acf_pacf(data: pd.DataFrame, ticker: str, nlags: int) -> go.Figure: """Plot ACF and PACF.""" data["Daily_Return"] = data["Adj Close"].pct_change().dropna() daily_returns = data["Daily_Return"].values # Remove non-finite values daily_returns = daily_returns[np.isfinite(daily_returns)] acf_vals = acf(daily_returns, nlags=nlags)[1:] if len(daily_returns) > nlags else np.zeros(nlags) # Exclude lag 0, fallback to zeros pacf_vals = pacf(daily_returns, nlags=nlags)[1:] if len(daily_returns) > nlags else np.zeros(nlags) # Exclude lag 0, fallback to zeros fig = make_subplots(rows=1, cols=2, subplot_titles=['Autocorrelation Function (ACF)', 'Partial Autocorrelation Function (PACF)']) fig.add_trace(go.Bar(x=np.arange(1, len(acf_vals) + 1), y=acf_vals, name='ACF'), row=1, col=1) fig.add_trace(go.Bar(x=np.arange(1, len(pacf_vals) + 1), y=pacf_vals, name='PACF'), row=1, col=2) fig.update_layout(height=600, width=1200, title_text=f'{ticker} ACF and PACF') return fig def plot_boxplots(data: pd.DataFrame, ticker: str) -> go.Figure: """Plot boxplots.""" data['Return'] = data['Close'].pct_change().dropna() # Drop NaN values data['Day of Week'] = data.index.day_name() data['Month'] = data.index.month_name() data['Year'] = data.index.year fig = make_subplots(rows=3, cols=1, subplot_titles=['Day of the Week Effect', 'Month of the Year Effect', 'Year Effect']) day_fig = px.box(data, x='Day of Week', y='Return', category_orders={'Day of Week': ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']}) month_fig = px.box(data, x='Month', y='Return', category_orders={'Month': ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']}) year_fig = px.box(data, x='Year', y='Return') fig.add_trace(day_fig.data[0], row=1, col=1) fig.add_trace(month_fig.data[0], row=2, col=1) fig.add_trace(year_fig.data[0], row=3, col=1) fig.update_layout(height=900, width=1200, title_text=f'{ticker} Boxplots') return fig def plot_rolling_statistics(data: pd.DataFrame, ticker: str, window_size: int, quantile_value: float) -> go.Figure: """Plot rolling statistics.""" data['Rolling_Kurtosis'] = data['Close'].rolling(window_size).apply(lambda x: stats.kurtosis(x.dropna()) if len(x.dropna()) > 3 else np.nan) data['Rolling_Skewness'] = data['Close'].rolling(window_size).apply(lambda x: stats.skew(x.dropna()) if len(x.dropna()) > 2 else np.nan) data['Rolling_Quantile'] = data['Close'].rolling(window_size).quantile(quantile_value) data['Rolling_Autocorrelation'] = data['Close'].rolling(window_size).apply(lambda x: x.autocorr() if len(x.dropna()) > 1 else np.nan) fig = make_subplots(rows=5, cols=1, subplot_titles=['Close Price', 'Rolling Kurtosis', 'Rolling Skewness', 'Rolling Quantile', 'Rolling Autocorrelation']) fig.add_trace(go.Scatter(x=data.index, y=data['Close'], mode='lines', name='Close'), row=1, col=1) fig.add_trace(go.Scatter(x=data.index, y=data['Rolling_Kurtosis'], mode='lines', name='Rolling Kurtosis', line=dict(color='orange')), row=2, col=1) fig.add_trace(go.Scatter(x=data.index, y=data['Rolling_Skewness'], mode='lines', name='Rolling Skewness', line=dict(color='green')), row=3, col=1) fig.add_trace(go.Scatter(x=data.index, y=data['Rolling_Quantile'], mode='lines', name='Rolling Quantile', line=dict(color='red')), row=4, col=1) fig.add_trace(go.Scatter(x=data.index, y=data['Rolling_Autocorrelation'], mode='lines', name='Rolling Autocorrelation', line=dict(color='purple')), row=5, col=1) fig.update_layout(height=900, width=1200, title_text=f'{ticker} Rolling Statistics') return fig # Streamlit app st.set_page_config(page_title="Asset Price Descriptive Statistics", layout="wide") st.title('Asset Price Descriptive Statistics') st.sidebar.title('Input Parameters') # Sidebar for "How to Use" instructions with st.sidebar.expander("How to Use", expanded=False): st.write(""" ### How to Use: 1. Enter the stock or crypto ticker, start date, and end date. 2. Choose the desired analysis method from the available options. 3. Adjust the parameters specific to the selected method if needed. 4. Click 'Fetch Data' to load the data and run the analysis. 5. The results and visualizations will appear in the main section of the app. """) # Expander for Ticker and Date Input with st.sidebar.expander("Ticker and Date Input", expanded=True): ticker = st.text_input('Enter Stock or Crypto Ticker (e.g., AAPL or BTC-USD)', 'ASML.AS', help="Enter the symbol for the stock or cryptocurrency you want to analyze.") start_date = st.date_input('Start Date', pd.to_datetime('2000-01-01'), help="Select the start date for data fetching.") end_date = st.date_input('End Date', pd.to_datetime(pd.Timestamp.now().date() + pd.Timedelta(days=1)), help="Select the end date for data fetching.") # Expander for Method Selection with st.sidebar.expander("Method Selection", expanded=True): selected = st.radio("Select Method", ["Candlestick Chart", "Returns Distribution", "Probability Plots", "Time Series Analysis", "ACF and PACF", "Boxplots", "Rolling Statistics"]) # Expander for Method-Specific Parameters with st.sidebar.expander("Parameters", expanded=True): if selected == "Returns Distribution": return_periods = { "Daily": st.text_input('Daily Resampling Frequency', 'D', help="Set the frequency for daily returns calculation."), "Weekly": st.text_input('Weekly Resampling Frequency', 'W', help="Set the frequency for weekly returns calculation."), "Monthly": st.text_input('Monthly Resampling Frequency', 'M', help="Set the frequency for monthly returns calculation."), } elif selected == "Time Series Analysis": period = st.number_input('STL Decomposition Period', min_value=1, value=252, help="Enter the period for STL decomposition (e.g., 252 for yearly seasonality).") elif selected == "ACF and PACF": nlags = st.number_input('Number of Lags', min_value=1, value=100, help="Enter the number of lags for ACF and PACF plots.") elif selected == "Rolling Statistics": window_size = st.number_input('Rolling Window Size', min_value=1, value=20, help="Enter the window size for rolling calculations.") quantile_value = st.number_input('Quantile Value', min_value=0.0, max_value=1.0, value=0.5, help="Set the quantile value for rolling quantile calculation.") # Fetch data if 'data' not in st.session_state or st.sidebar.button('Fetch Data'): data = fetch_stock_data(ticker, start_date, end_date) if data.empty: st.error(f"No data returned for {ticker} from {start_date} to {end_date}") else: st.session_state.data = data if 'data' in st.session_state and not st.session_state.data.empty: data = st.session_state.data # Display results based on the selected method if selected == "Candlestick Chart": st.markdown("""### Candlestick Chart The candlestick chart visually represents the open, high, low, and close prices of a stock or crypto for each day. """) with st.expander("Method Description", expanded=False): st.markdown(""" **Components of a Candlestick:** - **Open**: The price at which a stock or crypto started trading at the beginning of the time period. - **High**: The highest price reached during the time period. - **Low**: The lowest price reached during the time period. - **Close**: The price at which a stock or crypto stopped trading at the end of the time period. **Candlestick Structure:** - **Body**: The area between the open and close prices. - If the close is higher than the open, the body is typically green or white, indicating a bullish sentiment. - If the close is lower than the open, the body is typically red or black, indicating a bearish sentiment. - **Wicks (Shadows)**: The lines extending above and below the body. - The upper wick represents the range from the high to the close (or open, if it's higher). - The lower wick represents the range from the low to the open (or close, if it's lower). **How to use:** 1. Enter the stock or crypto ticker, start date, and end date. 2. Click 'Fetch Data' to load the data. 3. The chart will display the candlestick chart for the selected period. **Results:** The chart shows the candlestick representation of the stock or crypto's price movements over time, helping to identify trends, reversals, and patterns such as dojis, hammers, and engulfing patterns. """) fig = plot_candlestick(data, ticker) st.plotly_chart(fig) elif selected == "Returns Distribution": st.markdown("""### Returns Distribution This analysis shows the distribution of daily, weekly, and monthly returns. It helps understand the asset return characteristics by visualizing the frequency and magnitude of price changes over different time periods. """) with st.expander("Method Description", expanded=False): st.markdown(""" **Components:** - **Daily Returns**: Calculated as the percentage change in the closing price from one day to the next. - **Weekly Returns**: Calculated as the percentage change in the closing price from the last trading day of one week to the last trading day of the next week. - **Monthly Returns**: Calculated as the percentage change in the closing price from the last trading day of one month to the last trading day of the next month. """) st.latex(r''' \text{Daily Return} (R_d) = \frac{P_{\text{close}, t} - P_{\text{close}, t-1}}{P_{\text{close}, t-1}} ''') st.latex(r''' \text{Weekly Return} (R_w) = \frac{P_{\text{close}, \text{week}_t} - P_{\text{close}, \text{week}_{t-1}}}{P_{\text{close}, \text{week}_{t-1}}} ''') st.latex(r''' \text{Monthly Return} (R_m) = \frac{P_{\text{close}, \text{month}_t} - P_{\text{close}, \text{month}_{t-1}}}{P_{\text{close}, \text{month}_{t-1}}} ''') st.markdown(""" Where: - `P_close_t` is the closing price at time `t`. - `P_close_(t-1)` is the closing price at time `t-1`. - `P_close_week_t` is the closing price at the end of the current week. - `P_close_week_(t-1)` is the closing price at the end of the previous week. - `P_close_month_t` is the closing price at the end of the current month. - `P_close_month_(t-1)` is the closing price at the end of the previous month """) fig = plot_returns_distribution(data, ticker, return_periods) st.plotly_chart(fig) elif selected == "Probability Plots": with st.expander("Method Description", expanded=False): st.markdown(""" ### Probability Plots This analysis shows the normal and t-distribution probability plots of daily returns. It helps check if the returns follow a specific distribution. """) st.markdown(""" **Components:** - **Normal Probability Plot**: Plots the quantiles of the daily returns against the theoretical quantiles of a normal distribution. - **T-Distribution Probability Plot**: Plots the quantiles of the daily returns against the theoretical quantiles of a t-distribution with estimated parameters. **Formulas:** - **Daily Return** (R_d): """) st.latex(r''' R_d = \frac{P_{close, t} - P_{close, t-1}}{P_{close, t-1}} ''') st.markdown(""" where: - `P_close_t` is the closing price at time `t`. - `P_close_(t-1)` is the closing price at time `t-1`. - **Normal Probability Plot**: Compares the ordered sample values of daily returns to the expected values if the data followed a normal distribution. - **T-Distribution Probability Plot**: Compares the ordered sample values of daily returns to the expected values if the data followed a t-distribution. The t-distribution is parameterized by degrees of freedom (ν), location parameter (μ), and scale parameter (σ): """) st.latex(r''' t(x, \nu, \mu, \sigma) = \frac{\Gamma\left(\frac{\nu + 1}{2}\right)}{\sqrt{\nu \pi} \Gamma\left(\frac{\nu}{2}\right)} \left(1 + \frac{(x - \mu)^2}{\nu \sigma^2}\right)^{-\frac{\nu + 1}{2}} ''') st.markdown(""" where: - `Γ` is the gamma function. - `ν` is the degrees of freedom. - `μ` is the location parameter. - `σ` is the scale parameter. """) fig = plot_probability_plots(data, ticker) st.plotly_chart(fig) elif selected == "Time Series Analysis": st.markdown("""### Time Series Analysis This analysis decomposes the stock or crypto price into trend, seasonal, and residual components. """) with st.expander("Method Description", expanded=False): st.markdown(""" **Components:** - **Trend** (T_t): Represents the long-term movement in the time series. - **Seasonal** (S_t): Captures the repeating short-term cycle in the data. - **Residual** (R_t): The remaining component after removing the trend and seasonal effects, representing the noise or irregular component. **Formulas:** The time series (Y_t) can be decomposed as: """) st.latex(r''' Y_t = T_t + S_t + R_t ''') st.markdown(""" where: - `Y_t` is the observed value at time `t`. - `T_t` is the trend component. - `S_t` is the seasonal component. - `R_t` is the residual component. The Seasonal-Trend decomposition using LOESS is used for this decomposition: - **LOESS** (Locally Estimated Scatterplot Smoothing): A non-parametric method that fits multiple regressions in local neighborhoods. """) fig = plot_time_series_analysis(data, ticker, period) st.plotly_chart(fig) elif selected == "ACF and PACF": st.markdown("""### ACF and PACF This analysis shows the autocorrelation and partial autocorrelation functions of the stock or crypto's daily returns. It helps identify the presence of patterns or trends. """) with st.expander("Method Description", expanded=False): st.markdown(""" **Autocorrelation Function (ACF):** The ACF measures the correlation between a time series and its lagged values. It is defined as: """) st.latex(r''' \rho_k = \frac{\sum_{t=k+1}^{n} (Y_t - \bar{Y})(Y_{t-k} - \bar{Y})}{\sum_{t=1}^{n} (Y_t - \bar{Y})^2} ''') st.markdown(""" where: - `ρ_k` is the autocorrelation at lag `k`. - `Y_t` is the value at time `t`. - `Ȳ` is the mean of the series. **Partial Autocorrelation Function (PACF):** The PACF measures the correlation between a time series and its lagged values, controlling for the values of the intermediate lags. It is defined as the coefficient `φ_kk` in the linear regression: """) st.latex(r''' Y_t = \phi_{k1} Y_{t-1} + \phi_{k2} Y_{t-2} + \cdots + \phi_{kk} Y_{t-k} + \epsilon_t ''') st.markdown(""" where: - `φ_kk` is the partial autocorrelation at lag `k`. - `ε_t` is the white noise error term. """) fig = plot_acf_pacf(data, ticker, nlags) st.plotly_chart(fig) elif selected == "Boxplots": with st.expander("Method Description", expanded=False): st.markdown(""" ### Boxplots This analysis shows the effect of the day of the week, month of the year, and year on the stock or crypto's returns. It helps identify patterns based on time periods. **How it Works:** - **Day of the Week Effect:** The boxplot groups returns by each day of the week (e.g., Monday, Tuesday) to identify any patterns or anomalies specific to particular days. - **Month of the Year Effect:** The boxplot groups returns by each month to highlight any seasonal effects in the stock or crypto returns. - **Year Effect:** The boxplot groups returns by year to observe any long-term trends or changes in performance over the years. """) fig = plot_boxplots(data, ticker) st.plotly_chart(fig) elif selected == "Rolling Statistics": with st.expander("Method Description", expanded=False): st.markdown(""" ### Rolling Statistics This analysis shows the rolling kurtosis, skewness, quantile, and autocorrelation of the stock or crypto's price. It helps understand the dynamic changes in the characteristics. **How it Works:** - **Rolling Kurtosis:** Kurtosis measures the tails' heaviness of the distribution. It indicates the presence of outliers. """) st.latex(r''' \text{Kurtosis}(X) = \frac{E[(X - \mu)^4]}{\sigma^4} ''') st.markdown(""" - **Rolling Skewness:** Skewness measures the asymmetry of the distribution of returns. """) st.latex(r''' \text{Skewness}(X) = \frac{E[(X - \mu)^3]}{\sigma^3} ''') st.markdown(""" - **Rolling Quantile:** Quantile indicates the value below which a given percentage of observations fall. For the 50th percentile (median): """) st.latex(r''' Q_p(X) = \inf \{ x \in \mathbb{R} : F_X(x) \geq p \} ''') st.markdown(""" - **Rolling Autocorrelation:** Autocorrelation measures the correlation of the series with its lagged values. """) st.latex(r''' \text{Autocorrelation}(k) = \frac{E[(X_t - \mu)(X_{t-k} - \mu)]}{\sigma^2} ''') fig = plot_rolling_statistics(data, ticker, window_size, quantile_value) st.plotly_chart(fig) # Hide Streamlit's default footer and menu hide_streamlit_style = """ """ st.markdown(hide_streamlit_style, unsafe_allow_html=True)