Space10 / app.py
QuantumLearner's picture
Update app.py
39e33da verified
import pandas as pd
import numpy as np
import yfinance as yf
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
from scipy import stats
import streamlit as st
from statsmodels.tsa.seasonal import STL
from statsmodels.tsa.stattools import acf, pacf
# Helper function to fetch stock or crypto data
def fetch_stock_data(ticker: str, start_date: str, end_date: str) -> pd.DataFrame:
"""Fetch stock or crypto data from Yahoo Finance."""
data = yf.download(ticker, start=start_date, end=end_date, auto_adjust=False)
if isinstance(data.columns, pd.MultiIndex):
data.columns = data.columns.get_level_values(0)
return data.dropna()
# Descriptive statistics methods
def plot_candlestick(data: pd.DataFrame, ticker: str) -> go.Figure:
"""Plot candlestick chart."""
fig = go.Figure(data=[go.Candlestick(x=data.index,
open=data['Open'],
high=data['High'],
low=data['Low'],
close=data['Close'])])
fig.update_layout(title=f"{ticker} Candlestick Chart",
xaxis_title="Date",
yaxis_title="Price",
hovermode="x",
hoverlabel=dict(bgcolor="white", font_size=12),
height=800)
return fig
def plot_returns_distribution(data: pd.DataFrame, ticker: str, return_periods: dict) -> go.Figure:
"""Plot returns distribution."""
returns = {period: data['Close'].resample(freq).last().pct_change().dropna()
for period, freq in return_periods.items()}
fig = make_subplots(rows=3, cols=2, subplot_titles=[f'{ticker} {period} Down Days' for period in returns.keys()] +
[f'{ticker} {period} Up Days' for period in returns.keys()])
for i, (return_period, return_data) in enumerate(returns.items()):
up_days = return_data[return_data > 0]
down_days = return_data[return_data < 0]
for j, (dataset, label) in enumerate(zip([down_days, up_days], ['Down days', 'Up days'])):
median = dataset.median() if not dataset.empty else 0
fig.add_trace(go.Histogram(x=dataset, nbinsx=35, name=f'{label}', histnorm='probability', opacity=0.5, marker_color='blue' if label == 'Up days' else 'red', showlegend=False),
row=(i*2)//2+1, col=j+1)
fig.add_vline(x=median, line=dict(color='green', dash='dash'), annotation_text=f'Median: {median:.2f}', row=(i*2)//2+1, col=j+1)
fig.update_xaxes(tickformat=".2%", tickangle=90, row=(i*2)//2+1, col=j+1)
fig.update_yaxes(title_text='Proportion', row=(i*2)//2+1, col=j+1)
fig.update_layout(title_text=f'{ticker} {return_period} Distribution')
return fig
def plot_probability_plots(data: pd.DataFrame, ticker: str) -> go.Figure:
"""Plot probability plots."""
data["Daily_Return"] = data["Adj Close"].pct_change().dropna()
daily_returns = data["Daily_Return"].values
# Remove non-finite values
daily_returns = daily_returns[np.isfinite(daily_returns)]
# Fit the data to a Student's t-distribution
degree_of_freedom, loc, scale = stats.t.fit(daily_returns) if len(daily_returns) > 2 else (3, 0, 1) # Default fallback
# Create the figure for the subplots
fig = make_subplots(rows=1, cols=2, subplot_titles=[
f'{ticker} Daily Returns - Normal Probability Plot',
f'{ticker} Daily Returns - Student\'s t-distribution Probability Plot'])
# Normal Probability Plot
osm, osr = stats.probplot(daily_returns, dist="norm")
fig.add_trace(go.Scatter(x=osm[0], y=osm[1], mode='markers', name='Normal Q-Q', showlegend=False), row=1, col=1)
# Calculate the line of best fit for Normal Probability Plot
slope, intercept, _, _, _ = stats.linregress(osm[0], osm[1]) if len(osm[0]) > 1 else (1, 0, 0, 0, 0)
line = slope * np.array(osm[0]) + intercept
fig.add_trace(go.Scatter(x=osm[0], y=line, mode='lines', name='45-degree line', showlegend=False), row=1, col=1)
# Student's t-distribution Probability Plot
osm, osr = stats.probplot(daily_returns, dist=stats.t(degree_of_freedom, loc, scale))
fig.add_trace(go.Scatter(x=osm[0], y=osm[1], mode='markers', name='T-dist Q-Q', showlegend=False), row=1, col=2)
# Calculate the line of best fit for T-distribution Probability Plot
slope, intercept, _, _, _ = stats.linregress(osm[0], osm[1]) if len(osm[0]) > 1 else (1, 0, 0, 0, 0)
line = slope * np.array(osm[0]) + intercept
fig.add_trace(go.Scatter(x=osm[0], y=line, mode='lines', name='45-degree line', showlegend=False), row=1, col=2)
fig.update_layout(height=600, width=1200, title_text=f'{ticker} Probability Plots')
return fig
def plot_time_series_analysis(data: pd.DataFrame, ticker: str, period: int) -> go.Figure:
"""Plot time series analysis."""
price_data = data["Close"]
stl = STL(price_data, period=period)
decomposition = stl.fit()
trend = decomposition.trend
seasonal = decomposition.seasonal
residual = decomposition.resid
fig = make_subplots(rows=4, cols=1, subplot_titles=['Original Time Series', 'Trend Component', 'Seasonal Component', 'Residual Component'])
fig.add_trace(go.Scatter(x=price_data.index, y=price_data, mode='lines', name='Original'), row=1, col=1)
fig.add_trace(go.Scatter(x=trend.index, y=trend, mode='lines', name='Trend', line=dict(color='orange')), row=2, col=1)
fig.add_trace(go.Scatter(x=seasonal.index, y=seasonal, mode='lines', name='Seasonal', line=dict(color='green')), row=3, col=1)
fig.add_trace(go.Scatter(x=residual.index, y=residual, mode='lines', name='Residual', line=dict(color='red')), row=4, col=1)
fig.update_layout(height=900, width=1200, title_text=f'{ticker} Time Series Analysis')
return fig
def plot_acf_pacf(data: pd.DataFrame, ticker: str, nlags: int) -> go.Figure:
"""Plot ACF and PACF."""
data["Daily_Return"] = data["Adj Close"].pct_change().dropna()
daily_returns = data["Daily_Return"].values
# Remove non-finite values
daily_returns = daily_returns[np.isfinite(daily_returns)]
acf_vals = acf(daily_returns, nlags=nlags)[1:] if len(daily_returns) > nlags else np.zeros(nlags) # Exclude lag 0, fallback to zeros
pacf_vals = pacf(daily_returns, nlags=nlags)[1:] if len(daily_returns) > nlags else np.zeros(nlags) # Exclude lag 0, fallback to zeros
fig = make_subplots(rows=1, cols=2, subplot_titles=['Autocorrelation Function (ACF)', 'Partial Autocorrelation Function (PACF)'])
fig.add_trace(go.Bar(x=np.arange(1, len(acf_vals) + 1), y=acf_vals, name='ACF'), row=1, col=1)
fig.add_trace(go.Bar(x=np.arange(1, len(pacf_vals) + 1), y=pacf_vals, name='PACF'), row=1, col=2)
fig.update_layout(height=600, width=1200, title_text=f'{ticker} ACF and PACF')
return fig
def plot_boxplots(data: pd.DataFrame, ticker: str) -> go.Figure:
"""Plot boxplots."""
data['Return'] = data['Close'].pct_change().dropna() # Drop NaN values
data['Day of Week'] = data.index.day_name()
data['Month'] = data.index.month_name()
data['Year'] = data.index.year
fig = make_subplots(rows=3, cols=1, subplot_titles=['Day of the Week Effect', 'Month of the Year Effect', 'Year Effect'])
day_fig = px.box(data, x='Day of Week', y='Return', category_orders={'Day of Week': ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']})
month_fig = px.box(data, x='Month', y='Return', category_orders={'Month': ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']})
year_fig = px.box(data, x='Year', y='Return')
fig.add_trace(day_fig.data[0], row=1, col=1)
fig.add_trace(month_fig.data[0], row=2, col=1)
fig.add_trace(year_fig.data[0], row=3, col=1)
fig.update_layout(height=900, width=1200, title_text=f'{ticker} Boxplots')
return fig
def plot_rolling_statistics(data: pd.DataFrame, ticker: str, window_size: int, quantile_value: float) -> go.Figure:
"""Plot rolling statistics."""
data['Rolling_Kurtosis'] = data['Close'].rolling(window_size).apply(lambda x: stats.kurtosis(x.dropna()) if len(x.dropna()) > 3 else np.nan)
data['Rolling_Skewness'] = data['Close'].rolling(window_size).apply(lambda x: stats.skew(x.dropna()) if len(x.dropna()) > 2 else np.nan)
data['Rolling_Quantile'] = data['Close'].rolling(window_size).quantile(quantile_value)
data['Rolling_Autocorrelation'] = data['Close'].rolling(window_size).apply(lambda x: x.autocorr() if len(x.dropna()) > 1 else np.nan)
fig = make_subplots(rows=5, cols=1, subplot_titles=['Close Price', 'Rolling Kurtosis', 'Rolling Skewness', 'Rolling Quantile', 'Rolling Autocorrelation'])
fig.add_trace(go.Scatter(x=data.index, y=data['Close'], mode='lines', name='Close'), row=1, col=1)
fig.add_trace(go.Scatter(x=data.index, y=data['Rolling_Kurtosis'], mode='lines', name='Rolling Kurtosis', line=dict(color='orange')), row=2, col=1)
fig.add_trace(go.Scatter(x=data.index, y=data['Rolling_Skewness'], mode='lines', name='Rolling Skewness', line=dict(color='green')), row=3, col=1)
fig.add_trace(go.Scatter(x=data.index, y=data['Rolling_Quantile'], mode='lines', name='Rolling Quantile', line=dict(color='red')), row=4, col=1)
fig.add_trace(go.Scatter(x=data.index, y=data['Rolling_Autocorrelation'], mode='lines', name='Rolling Autocorrelation', line=dict(color='purple')), row=5, col=1)
fig.update_layout(height=900, width=1200, title_text=f'{ticker} Rolling Statistics')
return fig
# Streamlit app
st.set_page_config(page_title="Asset Price Descriptive Statistics", layout="wide")
st.title('Asset Price Descriptive Statistics')
st.sidebar.title('Input Parameters')
# Sidebar for "How to Use" instructions
with st.sidebar.expander("How to Use", expanded=False):
st.write("""
### How to Use:
1. Enter the stock or crypto ticker, start date, and end date.
2. Choose the desired analysis method from the available options.
3. Adjust the parameters specific to the selected method if needed.
4. Click 'Fetch Data' to load the data and run the analysis.
5. The results and visualizations will appear in the main section of the app.
""")
# Expander for Ticker and Date Input
with st.sidebar.expander("Ticker and Date Input", expanded=True):
ticker = st.text_input('Enter Stock or Crypto Ticker (e.g., AAPL or BTC-USD)', 'ASML.AS', help="Enter the symbol for the stock or cryptocurrency you want to analyze.")
start_date = st.date_input('Start Date', pd.to_datetime('2000-01-01'), help="Select the start date for data fetching.")
end_date = st.date_input('End Date', pd.to_datetime(pd.Timestamp.now().date() + pd.Timedelta(days=1)), help="Select the end date for data fetching.")
# Expander for Method Selection
with st.sidebar.expander("Method Selection", expanded=True):
selected = st.radio("Select Method", ["Candlestick Chart", "Returns Distribution", "Probability Plots",
"Time Series Analysis", "ACF and PACF", "Boxplots", "Rolling Statistics"])
# Expander for Method-Specific Parameters
with st.sidebar.expander("Parameters", expanded=True):
if selected == "Returns Distribution":
return_periods = {
"Daily": st.text_input('Daily Resampling Frequency', 'D', help="Set the frequency for daily returns calculation."),
"Weekly": st.text_input('Weekly Resampling Frequency', 'W', help="Set the frequency for weekly returns calculation."),
"Monthly": st.text_input('Monthly Resampling Frequency', 'M', help="Set the frequency for monthly returns calculation."),
}
elif selected == "Time Series Analysis":
period = st.number_input('STL Decomposition Period', min_value=1, value=252, help="Enter the period for STL decomposition (e.g., 252 for yearly seasonality).")
elif selected == "ACF and PACF":
nlags = st.number_input('Number of Lags', min_value=1, value=100, help="Enter the number of lags for ACF and PACF plots.")
elif selected == "Rolling Statistics":
window_size = st.number_input('Rolling Window Size', min_value=1, value=20, help="Enter the window size for rolling calculations.")
quantile_value = st.number_input('Quantile Value', min_value=0.0, max_value=1.0, value=0.5, help="Set the quantile value for rolling quantile calculation.")
# Fetch data
if 'data' not in st.session_state or st.sidebar.button('Fetch Data'):
data = fetch_stock_data(ticker, start_date, end_date)
if data.empty:
st.error(f"No data returned for {ticker} from {start_date} to {end_date}")
else:
st.session_state.data = data
if 'data' in st.session_state and not st.session_state.data.empty:
data = st.session_state.data
# Display results based on the selected method
if selected == "Candlestick Chart":
st.markdown("""### Candlestick Chart
The candlestick chart visually represents the open, high, low, and close prices of a stock or crypto for each day.
""")
with st.expander("Method Description", expanded=False):
st.markdown("""
**Components of a Candlestick:**
- **Open**: The price at which a stock or crypto started trading at the beginning of the time period.
- **High**: The highest price reached during the time period.
- **Low**: The lowest price reached during the time period.
- **Close**: The price at which a stock or crypto stopped trading at the end of the time period.
**Candlestick Structure:**
- **Body**: The area between the open and close prices.
- If the close is higher than the open, the body is typically green or white, indicating a bullish sentiment.
- If the close is lower than the open, the body is typically red or black, indicating a bearish sentiment.
- **Wicks (Shadows)**: The lines extending above and below the body.
- The upper wick represents the range from the high to the close (or open, if it's higher).
- The lower wick represents the range from the low to the open (or close, if it's lower).
**How to use:**
1. Enter the stock or crypto ticker, start date, and end date.
2. Click 'Fetch Data' to load the data.
3. The chart will display the candlestick chart for the selected period.
**Results:**
The chart shows the candlestick representation of the stock or crypto's price movements over time, helping to identify trends, reversals, and patterns such as dojis, hammers, and engulfing patterns.
""")
fig = plot_candlestick(data, ticker)
st.plotly_chart(fig)
elif selected == "Returns Distribution":
st.markdown("""### Returns Distribution
This analysis shows the distribution of daily, weekly, and monthly returns. It helps understand the asset return characteristics by visualizing the frequency and magnitude of price changes over different time periods.
""")
with st.expander("Method Description", expanded=False):
st.markdown("""
**Components:**
- **Daily Returns**: Calculated as the percentage change in the closing price from one day to the next.
- **Weekly Returns**: Calculated as the percentage change in the closing price from the last trading day of one week to the last trading day of the next week.
- **Monthly Returns**: Calculated as the percentage change in the closing price from the last trading day of one month to the last trading day of the next month.
""")
st.latex(r'''
\text{Daily Return} (R_d) = \frac{P_{\text{close}, t} - P_{\text{close}, t-1}}{P_{\text{close}, t-1}}
''')
st.latex(r'''
\text{Weekly Return} (R_w) = \frac{P_{\text{close}, \text{week}_t} - P_{\text{close}, \text{week}_{t-1}}}{P_{\text{close}, \text{week}_{t-1}}}
''')
st.latex(r'''
\text{Monthly Return} (R_m) = \frac{P_{\text{close}, \text{month}_t} - P_{\text{close}, \text{month}_{t-1}}}{P_{\text{close}, \text{month}_{t-1}}}
''')
st.markdown("""
Where:
- `P_close_t` is the closing price at time `t`.
- `P_close_(t-1)` is the closing price at time `t-1`.
- `P_close_week_t` is the closing price at the end of the current week.
- `P_close_week_(t-1)` is the closing price at the end of the previous week.
- `P_close_month_t` is the closing price at the end of the current month.
- `P_close_month_(t-1)` is the closing price at the end of the previous month
""")
fig = plot_returns_distribution(data, ticker, return_periods)
st.plotly_chart(fig)
elif selected == "Probability Plots":
with st.expander("Method Description", expanded=False):
st.markdown("""
### Probability Plots
This analysis shows the normal and t-distribution probability plots of daily returns. It helps check if the returns follow a specific distribution.
""")
st.markdown("""
**Components:**
- **Normal Probability Plot**: Plots the quantiles of the daily returns against the theoretical quantiles of a normal distribution.
- **T-Distribution Probability Plot**: Plots the quantiles of the daily returns against the theoretical quantiles of a t-distribution with estimated parameters.
**Formulas:**
- **Daily Return** (R_d):
""")
st.latex(r'''
R_d = \frac{P_{close, t} - P_{close, t-1}}{P_{close, t-1}}
''')
st.markdown("""
where:
- `P_close_t` is the closing price at time `t`.
- `P_close_(t-1)` is the closing price at time `t-1`.
- **Normal Probability Plot**: Compares the ordered sample values of daily returns to the expected values if the data followed a normal distribution.
- **T-Distribution Probability Plot**: Compares the ordered sample values of daily returns to the expected values if the data followed a t-distribution. The t-distribution is parameterized by degrees of freedom (ν), location parameter (μ), and scale parameter (σ):
""")
st.latex(r'''
t(x, \nu, \mu, \sigma) = \frac{\Gamma\left(\frac{\nu + 1}{2}\right)}{\sqrt{\nu \pi} \Gamma\left(\frac{\nu}{2}\right)} \left(1 + \frac{(x - \mu)^2}{\nu \sigma^2}\right)^{-\frac{\nu + 1}{2}}
''')
st.markdown("""
where:
- `Γ` is the gamma function.
- `ν` is the degrees of freedom.
- `μ` is the location parameter.
- `σ` is the scale parameter.
""")
fig = plot_probability_plots(data, ticker)
st.plotly_chart(fig)
elif selected == "Time Series Analysis":
st.markdown("""### Time Series Analysis
This analysis decomposes the stock or crypto price into trend, seasonal, and residual components.
""")
with st.expander("Method Description", expanded=False):
st.markdown("""
**Components:**
- **Trend** (T_t): Represents the long-term movement in the time series.
- **Seasonal** (S_t): Captures the repeating short-term cycle in the data.
- **Residual** (R_t): The remaining component after removing the trend and seasonal effects, representing the noise or irregular component.
**Formulas:**
The time series (Y_t) can be decomposed as:
""")
st.latex(r'''
Y_t = T_t + S_t + R_t
''')
st.markdown("""
where:
- `Y_t` is the observed value at time `t`.
- `T_t` is the trend component.
- `S_t` is the seasonal component.
- `R_t` is the residual component.
The Seasonal-Trend decomposition using LOESS is used for this decomposition:
- **LOESS** (Locally Estimated Scatterplot Smoothing): A non-parametric method that fits multiple regressions in local neighborhoods.
""")
fig = plot_time_series_analysis(data, ticker, period)
st.plotly_chart(fig)
elif selected == "ACF and PACF":
st.markdown("""### ACF and PACF
This analysis shows the autocorrelation and partial autocorrelation functions of the stock or crypto's daily returns. It helps identify the presence of patterns or trends.
""")
with st.expander("Method Description", expanded=False):
st.markdown("""
**Autocorrelation Function (ACF):**
The ACF measures the correlation between a time series and its lagged values. It is defined as:
""")
st.latex(r'''
\rho_k = \frac{\sum_{t=k+1}^{n} (Y_t - \bar{Y})(Y_{t-k} - \bar{Y})}{\sum_{t=1}^{n} (Y_t - \bar{Y})^2}
''')
st.markdown("""
where:
- `ρ_k` is the autocorrelation at lag `k`.
- `Y_t` is the value at time `t`.
- `Ȳ` is the mean of the series.
**Partial Autocorrelation Function (PACF):**
The PACF measures the correlation between a time series and its lagged values, controlling for the values of the intermediate lags. It is defined as the coefficient `φ_kk` in the linear regression:
""")
st.latex(r'''
Y_t = \phi_{k1} Y_{t-1} + \phi_{k2} Y_{t-2} + \cdots + \phi_{kk} Y_{t-k} + \epsilon_t
''')
st.markdown("""
where:
- `φ_kk` is the partial autocorrelation at lag `k`.
- `ε_t` is the white noise error term.
""")
fig = plot_acf_pacf(data, ticker, nlags)
st.plotly_chart(fig)
elif selected == "Boxplots":
with st.expander("Method Description", expanded=False):
st.markdown("""
### Boxplots
This analysis shows the effect of the day of the week, month of the year, and year on the stock or crypto's returns. It helps identify patterns based on time periods.
**How it Works:**
- **Day of the Week Effect:**
The boxplot groups returns by each day of the week (e.g., Monday, Tuesday) to identify any patterns or anomalies specific to particular days.
- **Month of the Year Effect:**
The boxplot groups returns by each month to highlight any seasonal effects in the stock or crypto returns.
- **Year Effect:**
The boxplot groups returns by year to observe any long-term trends or changes in performance over the years.
""")
fig = plot_boxplots(data, ticker)
st.plotly_chart(fig)
elif selected == "Rolling Statistics":
with st.expander("Method Description", expanded=False):
st.markdown("""
### Rolling Statistics
This analysis shows the rolling kurtosis, skewness, quantile, and autocorrelation of the stock or crypto's price. It helps understand the dynamic changes in the characteristics.
**How it Works:**
- **Rolling Kurtosis:**
Kurtosis measures the tails' heaviness of the distribution. It indicates the presence of outliers.
""")
st.latex(r'''
\text{Kurtosis}(X) = \frac{E[(X - \mu)^4]}{\sigma^4}
''')
st.markdown("""
- **Rolling Skewness:**
Skewness measures the asymmetry of the distribution of returns.
""")
st.latex(r'''
\text{Skewness}(X) = \frac{E[(X - \mu)^3]}{\sigma^3}
''')
st.markdown("""
- **Rolling Quantile:**
Quantile indicates the value below which a given percentage of observations fall. For the 50th percentile (median):
""")
st.latex(r'''
Q_p(X) = \inf \{ x \in \mathbb{R} : F_X(x) \geq p \}
''')
st.markdown("""
- **Rolling Autocorrelation:**
Autocorrelation measures the correlation of the series with its lagged values.
""")
st.latex(r'''
\text{Autocorrelation}(k) = \frac{E[(X_t - \mu)(X_{t-k} - \mu)]}{\sigma^2}
''')
fig = plot_rolling_statistics(data, ticker, window_size, quantile_value)
st.plotly_chart(fig)
# Hide Streamlit's default footer and menu
hide_streamlit_style = """
<style>
#MainMenu {visibility: hidden;}
footer {visibility: hidden;}
</style>
"""
st.markdown(hide_streamlit_style, unsafe_allow_html=True)