Space38 / app.py
QuantumLearner's picture
Update app.py
8d5c2d4 verified
import streamlit as st
import yfinance as yf
import numpy as np
import pandas as pd
import plotly.graph_objs as go
from plotly.subplots import make_subplots
from statsmodels.tsa.api import VAR
from statsmodels.tsa.stattools import adfuller
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score
from datetime import datetime, timedelta
# Helper functions remain unchanged (except for yfinance adjustments)
def download_data(tickers, start_date, end_date):
data = {}
for name, ticker in tickers.items():
df = yf.download(ticker, start=start_date, end=end_date, auto_adjust=False)
if isinstance(df.columns, pd.MultiIndex):
df.columns = df.columns.get_level_values(0)
if df.empty:
raise ValueError(f"No data retrieved for {ticker}")
if len(df) < 252: # Ensure enough data for meaningful volatility calculation (1 year)
raise ValueError(f"Insufficient data points for {ticker}. Need at least 252 days.")
data[name] = df
return data
def calculate_returns_and_volatility(data, rolling_window):
stock_data = data['stock']
stock_data['Log_Returns'] = np.log(stock_data['Adj Close'] / stock_data['Adj Close'].shift(1))
stock_data['Volatility'] = stock_data['Log_Returns'].rolling(window=rolling_window).std() * np.sqrt(252)
stock_data = stock_data.dropna()
data['stock'] = stock_data
sp500_data = data['sp500']
sp500_data['Log_Returns'] = np.log(sp500_data['Adj Close'] / sp500_data['Adj Close'].shift(1))
sp500_data['SP500_Volatility'] = sp500_data['Log_Returns'].rolling(window=rolling_window).std() * np.sqrt(252)
sp500_data = sp500_data.dropna()
data['sp500'] = sp500_data
return data
def merge_data(data):
merged_data = data['stock'][['Volatility']].copy()
merged_data['SP500'] = data['sp500']['Adj Close']
merged_data['SP500_Volatility'] = data['sp500']['SP500_Volatility']
merged_data['VIX'] = np.log(data['vix']['Adj Close'])
merged_data['SP500_Returns'] = data['sp500']['Log_Returns']
merged_data['Volume'] = data['stock']['Volume']
merged_data['Stock_Returns'] = data['stock']['Log_Returns']
merged_data = merged_data.dropna()
return merged_data
def check_stationarity_and_difference(df):
"""
Perform ADF test for stationarity and apply differencing if necessary.
"""
for column in df.columns:
result = adfuller(df[column].dropna())
p_value = result[1]
if p_value > 0.05:
# Non-stationary series; apply differencing
df[column] = df[column].diff()
else:
pass # Series is stationary; no differencing needed
def normalize_data(df):
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = pd.DataFrame(scaler.fit_transform(df), columns=df.columns, index=df.index)
return scaled_data, scaler
def fit_var_model(scaled_data, max_lags=30):
model = VAR(scaled_data)
lag_order_results = model.select_order(maxlags=max_lags)
optimal_lag = lag_order_results.aic
results = model.fit(optimal_lag)
return results, optimal_lag
def forecast_future_values(results, scaled_data, scaler, steps, optimal_lag):
forecast_95, lower_95, upper_95 = results.forecast_interval(
scaled_data.values[-optimal_lag:], steps=steps, alpha=0.05)
forecast_68, lower_68, upper_68 = results.forecast_interval(
scaled_data.values[-optimal_lag:], steps=steps, alpha=0.32)
forecast_original = scaler.inverse_transform(forecast_95)
lower_95_original = scaler.inverse_transform(lower_95)
upper_95_original = scaler.inverse_transform(upper_95)
lower_68_original = scaler.inverse_transform(lower_68)
upper_68_original = scaler.inverse_transform(upper_68)
return forecast_original, lower_95_original, upper_95_original, lower_68_original, upper_68_original
# Plotting functions remain unchanged
def plot_forecast(merged_data, future_dates, volatility_predictions, lower_volatility_95, upper_volatility_95, lower_volatility_68, upper_volatility_68):
fig = go.Figure()
# Plot historical volatility
fig.add_trace(go.Scatter(x=merged_data.index, y=merged_data['Volatility'], mode='lines', name='Historical Volatility'))
# Plot 95% confidence intervals
fig.add_trace(go.Scatter(x=future_dates, y=upper_volatility_95, fill=None, mode='lines', line_color='lightgray', name='95% CI Upper'))
fig.add_trace(go.Scatter(x=future_dates, y=lower_volatility_95, fill='tonexty', mode='lines', line_color='lightgray', name='95% CI Lower'))
# Plot 68% confidence intervals
fig.add_trace(go.Scatter(x=future_dates, y=upper_volatility_68, fill=None, mode='lines', line_color='blue', name='68% CI Upper'))
fig.add_trace(go.Scatter(x=future_dates, y=lower_volatility_68, fill='tonexty', mode='lines', line_color='blue', name='68% CI Lower'))
# Plot predicted volatility
fig.add_trace(go.Scatter(x=future_dates, y=volatility_predictions, mode='lines',line_color='orange' ,name='Predicted Volatility', line=dict(dash='dot', width=4)))
fig.update_layout(title='Predicted Volatility with Confidence Intervals',
xaxis_title='Date', yaxis_title='Volatility',
template='plotly_white')
return fig
def plot_extended_forecast(forecast_data_extended, future_dates, volatility_predictions):
"""
Plot extended actual historical volatility and predicted future volatility using Plotly.
"""
# Align the length of future dates and predicted values
future_dates = future_dates[:len(volatility_predictions)]
volatility_predictions = volatility_predictions[:len(future_dates)]
fig = go.Figure()
# Plot extended actual historical volatility
fig.add_trace(go.Scatter(x=forecast_data_extended.index, y=forecast_data_extended['Volatility'], mode='lines', name='Extended Historical Volatility'))
# Plot predicted future volatility
fig.add_trace(go.Scatter(x=future_dates, y=volatility_predictions, mode='lines', name='Predicted Future Volatility', line=dict(dash='dash')))
fig.update_layout(title='Predicted Volatility with Extended Actual Data',
xaxis_title='Date',
yaxis_title='Volatility',
template='plotly_white')
return fig
def calculate_performance_metrics(forecast_data_extended, future_dates, volatility_predictions):
"""
Calculate performance metrics and return as markdown text.
"""
# Ensure future_dates are in the same format as the forecast_data index
new_future_dates = pd.to_datetime(future_dates)
# Create a DataFrame for future dates and predicted values
predicted_df = pd.DataFrame({
'Date': new_future_dates,
'Predicted Volatility': volatility_predictions
}).set_index('Date')
# Extract the actual future volatility values for the prediction period
actual_volatility = forecast_data_extended.loc[new_future_dates, 'Volatility']
# Create DataFrame for actual values
actual_df = pd.DataFrame({
'Date': actual_volatility.index,
'Actual Volatility': actual_volatility.values
}).set_index('Date')
# Join the actual and predicted DataFrames on the Date index
results_df = actual_df.join(predicted_df, how='inner')
# Metrics calculation
rmse = np.sqrt(mean_squared_error(results_df['Actual Volatility'], results_df['Predicted Volatility']))
mape = mean_absolute_percentage_error(results_df['Actual Volatility'], results_df['Predicted Volatility'])
mae = mean_absolute_error(results_df['Actual Volatility'], results_df['Predicted Volatility'])
mse = mean_squared_error(results_df['Actual Volatility'], results_df['Predicted Volatility'])
r2 = r2_score(results_df['Actual Volatility'], results_df['Predicted Volatility'])
metrics = f"""
**RMSE**: {rmse:.4f}
**MAPE**: {mape:.2%}
**MAE**: {mae:.4f}
**MSE**: {mse:.4f}
**R²**: {r2:.4f}
"""
return metrics
def plot_residuals_plotly(results):
"""
Plot residuals of VAR model using Plotly.
"""
residuals = results.resid
fig = go.Figure()
for col in residuals.columns:
fig.add_trace(go.Scatter(x=residuals.index, y=residuals[col], mode='lines', name=f'Residuals: {col}'))
fig.update_layout(title='Residuals of Model',
xaxis_title='Date', yaxis_title='Residuals',
template='plotly_white', showlegend=False)
return fig
def calculate_metrics_and_plot_errors_plotly(forecast_data_extended, future_dates, volatility_predictions):
"""
Calculate performance metrics and plot prediction errors using Plotly.
"""
# Ensure future_dates are in the same format as the forecast_data index
new_future_dates = pd.to_datetime(future_dates)
# Create a DataFrame for future dates and predicted values
predicted_df = pd.DataFrame({
'Date': new_future_dates,
'Predicted Volatility': volatility_predictions
}).set_index('Date')
# Extract the actual future volatility values for the prediction period
actual_volatility = forecast_data_extended.loc[new_future_dates, 'Volatility']
# Create DataFrame for actual values
actual_df = pd.DataFrame({
'Date': actual_volatility.index,
'Actual Volatility': actual_volatility.values
}).set_index('Date')
# Join the actual and predicted DataFrames on the Date index
results_df = actual_df.join(predicted_df, how='inner')
# Calculate errors over time
results_df['Error'] = results_df['Actual Volatility'] - results_df['Predicted Volatility']
# Create a Plotly figure with two subplots
fig = make_subplots(rows=2, cols=1, subplot_titles=("Scatter Plot of Predicted vs Actual Volatility", "Prediction Error Over Time"))
# Scatter plot of predicted vs actual values
fig.add_trace(
go.Scatter(
x=results_df['Actual Volatility'],
y=results_df['Predicted Volatility'],
mode='markers',
name='Predicted vs Actual'
),
row=1, col=1
)
# Add a line y = x
min_vol = min(results_df['Actual Volatility'].min(), results_df['Predicted Volatility'].min())
max_vol = max(results_df['Actual Volatility'].max(), results_df['Predicted Volatility'].max())
fig.add_trace(
go.Scatter(
x=[min_vol, max_vol],
y=[min_vol, max_vol],
mode='lines',
name='Perfect Prediction',
line=dict(dash='dash', color='red')
),
row=1, col=1
)
# Error plot over time
fig.add_trace(
go.Scatter(
x=results_df.index,
y=results_df['Error'],
mode='lines+markers',
name='Prediction Error'
),
row=2, col=1
)
fig.update_layout(height=700, title="Model Performance: Prediction Errors", template='plotly_white')
fig.update_xaxes(title_text='Actual Volatility', row=1, col=1)
fig.update_yaxes(title_text='Predicted Volatility', row=1, col=1)
fig.update_xaxes(title_text='Date', row=2, col=1)
fig.update_yaxes(title_text='Error (Actual - Predicted)', row=2, col=1)
return fig
def extended_forecast_evaluation(tickers, rolling_window, forecast_start_date,
forecast_end_date, future_dates, volatility_predictions):
"""
Extend forecast evaluation by comparing with actual data over an extended period.
"""
# Derive extended_start_date to ensure we have enough data for the rolling window
extended_start_date = (forecast_start_date - timedelta(days=rolling_window * 3)).strftime('%Y-%m-%d')
# Extended end date includes extra days for comparison
extended_end_date = forecast_end_date + timedelta(days=extra_days)
# Download the extended actual data for the stock
extended_actual_data = yf.download(tickers['stock'], start=extended_start_date, end=extended_end_date.strftime('%Y-%m-%d'), auto_adjust=False)
if isinstance(extended_actual_data.columns, pd.MultiIndex):
extended_actual_data.columns = extended_actual_data.columns.get_level_values(0)
if extended_actual_data.empty:
raise ValueError(f"No extended data retrieved for {tickers['stock']}")
if len(extended_actual_data) < rolling_window:
raise ValueError(f"Insufficient extended data points for {tickers['stock']}. Need at least {rolling_window} days.")
# Calculate daily returns and rolling volatility for the extended data
extended_actual_data['Returns'] = extended_actual_data['Adj Close'].pct_change()
extended_actual_data['Volatility'] = extended_actual_data['Returns'].rolling(window=rolling_window).std() * np.sqrt(252)
# Create forecast horizon DataFrame
forecast_horizon = pd.DataFrame(index=future_dates)
forecast_horizon['Volatility'] = np.nan
# Combine extended actual data with forecast horizon
forecast_data_extended = pd.concat([extended_actual_data, forecast_horizon], axis=0).sort_index()
forecast_data_extended['Volatility'] = forecast_data_extended['Volatility'].fillna(method='ffill')
forecast_data_extended = forecast_data_extended.dropna(subset=['Volatility'])
return forecast_data_extended
# Set page configuration for a wide layout
st.set_page_config(layout="wide")
st.title("Volatility Forecasting Tool")
st.sidebar.title("Input Parameters")
# How-to-use instructions in an expander
with st.sidebar.expander("How to Use the App", expanded=False):
st.markdown("""
**Step 1**: Select the page you want to use (Real-time Predictions or Model Performance).
**Step 2**: Enter the stock ticker symbol you wish to analyze.
**Step 3**: Adjust the start and end dates for your analysis.
**Step 4**: Configure additional parameters like rolling window and forecast horizon.
**Step 5**: Click the **Run Model** button to generate the forecasts and view the results.
""")
# Pages
page = st.sidebar.radio("Choose Page", ("Real-time Predictions", "Model Performance"))
# Common Sidebar inputs within an expander (opened by default)
with st.sidebar.expander("Ticker and Date Selection", expanded=True):
stock_ticker = st.text_input("Stock Ticker", value="ASML", help="Enter the ticker symbol of the stock you want to analyze (e.g., AAPL for Apple Inc.).")
# Hide VIX and SP500 tickers by using default values internally
tickers = {"stock": stock_ticker, "sp500": "^GSPC", "vix": "^VIX"}
# Additional parameters within another expander (opened by default)
with st.sidebar.expander("Model Parameters", expanded=True):
rolling_window = st.number_input(
"Rolling Window",
min_value=1,
value=21,
help="The number of days to use for calculating the rolling volatility."
)
n_days = st.number_input(
"Forecast Horizon (Days)",
min_value=1,
value=30,
help="The number of future days over which to forecast volatility."
)
if page == "Model Performance":
extra_days = st.number_input(
"Extra Days of Actual Data for Comparison",
min_value=1,
value=15,
help="Additional days of actual future data to include for comparison with the forecast."
)
# Separate Start and End Dates for each page within the expander
if page == "Real-time Predictions":
with st.sidebar.expander("Date Range Selection", expanded=True):
start_date_rt = st.date_input(
"Start Date",
value=datetime(2020, 1, 1),
key='start_date_rt',
help="The start date for getting the historical data."
)
end_date_rt = st.date_input(
"End Date",
value=datetime.now(),
key='end_date_rt',
max_value=datetime.now(),
help="The end date for getting the historical data."
)
# Context description in the main body
st.markdown("""
### Real-time Predictions
This apps allows you to generate real-time forecasts of stock price volatility using an advanced multi-variate deep learning learning model using external factors. Volatility is calculated as the rolling standard deviation of the stock's daily log returns. The model provides confidence intervals (68% and 95%) to represent uncertainty in the predictions.
""")
# Run button
run_button = st.sidebar.button("Run Model", key='run_button_rt')
# Placeholder for plots
plot_placeholder = st.empty()
if run_button:
try:
with st.spinner("Downloading data and processing (This will take a few seconds)..."):
data = download_data(tickers, start_date_rt, end_date_rt)
data = calculate_returns_and_volatility(data, rolling_window)
merged_data = merge_data(data)
# Preprocess the data
scaled_data, scaler = normalize_data(merged_data)
# Fit the VAR model
results, optimal_lag = fit_var_model(scaled_data)
# Forecast future values
forecast_original, lower_95_original, upper_95_original, lower_68_original, upper_68_original = forecast_future_values(
results, scaled_data, scaler, n_days, optimal_lag)
volatility_predictions = forecast_original[:, 0]
lower_volatility_95 = lower_95_original[:, 0]
upper_volatility_95 = upper_95_original[:, 0]
lower_volatility_68 = lower_68_original[:, 0]
upper_volatility_68 = upper_68_original[:, 0]
future_dates = pd.date_range(start=end_date_rt + timedelta(days=1), periods=n_days, freq='B')
# Display the forecast plot
forecast_fig = plot_forecast(merged_data, future_dates, volatility_predictions,
lower_volatility_95, upper_volatility_95,
lower_volatility_68, upper_volatility_68)
# Store results in session_state
st.session_state['rt_results'] = {'forecast_fig': forecast_fig}
# Display the plot using the placeholder
with plot_placeholder:
st.subheader("Forecasted Volatility")
st.plotly_chart(forecast_fig)
except Exception as e:
st.error(f"An error occurred while running the analysis: {e}")
elif 'rt_results' in st.session_state:
# Display stored plot using the placeholder
with plot_placeholder:
st.subheader("Forecasted Volatility")
st.plotly_chart(st.session_state['rt_results']['forecast_fig'])
elif page == "Model Performance":
with st.sidebar.expander("Date Range Selection", expanded=True):
# Model Performance page date inputs
start_date_mp = st.date_input(
"Start Date",
value=datetime(2020, 1, 1),
key='start_date_mp',
help="The start date for downloading historical data."
)
# Calculate the maximum allowable end date for model performance
today = datetime.now().date()
max_end_date_mp = today - timedelta(days=int(n_days + extra_days))
end_date_mp = st.date_input(
"End Date",
value=max_end_date_mp,
max_value=max_end_date_mp,
key='end_date_mp',
help="The end date for training the model. Cannot exceed the maximum allowed date."
)
# Context description in the main body
st.markdown("""
### Model Performance
Here you assess how well the model forecasts volatility by comparing predicted values with actual historical (unseen) data. djust the parameters in the sidebar and click **Run Model** to assess performance.
""")
with st.expander("The following analyses are performed", expanded=False):
st.markdown("""
1. **Predicted vs Actual Volatility**: The app compares the predicted stock volatility with actual volatility over a given time period. Volatility is calculated as the rolling standard deviation of daily log returns. The forecasted values are plotted alongside actual values to visualize performance.
2. **Residual Analysis**: Residuals represent the difference between the actual and predicted values. A plot of the residuals helps identify patterns or systematic errors in the predictions, such as under or overestimation.
3. **Error Metrics**: The app calculates several error metrics to quantify the accuracy of the predictions:
- **RMSE (Root Mean Squared Error)**: Measures the average magnitude of errors in the predictions, penalizing larger errors.
- **MAE (Mean Absolute Error)**: Represents the average absolute difference between predicted and actual volatility.
- **MAPE (Mean Absolute Percentage Error)**: Shows the prediction accuracy as a percentage, providing a relative measure of performance.
- **R² (R-squared)**: Indicates how well the predicted values explain the variability in the actual volatility, with a value closer to 1 indicating better performance.
4. **Confidence Intervals**: The model provides 68% and 95% confidence intervals to quantify uncertainty around the predictions. Wider intervals indicate more uncertainty, while narrower ones suggest more confidence in the forecasts.
**Instructions:**
- **Adjust Parameters**: Set the rolling window, forecast horizon, and extra days for comparison in the sidebar.
- **Run the Model**: Click **Run Model** to download data, train the model, and evaluate its performance using actual market data.
- **Evaluate Results**: The app visualizes the results with performance metrics, residual plots, and error analysis to help gauge how well the model performs.
""")
# Run button
run_button = st.sidebar.button("Run Model", key='run_button_mp')
# Placeholders for plots and metrics
forecast_placeholder = st.empty()
extended_forecast_placeholder = st.empty()
metrics_placeholder = st.empty()
residual_placeholder = st.empty()
error_placeholder = st.empty()
if run_button:
try:
with st.spinner("Downloading data and processing predictions (This will take a few seconds)..."):
# Convert end_date_mp to datetime if necessary
adjusted_end_date = pd.to_datetime(end_date_mp)
# Extended end date includes n_days forecast plus extra_days for comparison
extended_end_date = adjusted_end_date + timedelta(days=n_days + extra_days)
data = download_data(tickers, start_date_mp, extended_end_date)
data = calculate_returns_and_volatility(data, rolling_window)
merged_data = merge_data(data)
# Ensure that the data is up to adjusted_end_date for training
merged_data_train = merged_data[merged_data.index <= adjusted_end_date]
# Check stationarity and difference if necessary
merged_data_diff = merged_data_train.copy()
check_stationarity_and_difference(merged_data_diff)
merged_data_diff = merged_data_diff.dropna()
# Normalize data
scaled_data, scaler = normalize_data(merged_data_diff)
# Fit VAR model
results, optimal_lag = fit_var_model(scaled_data)
# Forecast future values
forecast_original, lower_95_original, upper_95_original, lower_68_original, upper_68_original = forecast_future_values(
results, scaled_data, scaler, n_days, optimal_lag)
volatility_predictions = forecast_original[:, 0]
lower_volatility_95 = lower_95_original[:, 0]
upper_volatility_95 = upper_95_original[:, 0]
lower_volatility_68 = lower_68_original[:, 0]
upper_volatility_68 = upper_68_original[:, 0]
# Generate future dates
future_dates = pd.date_range(start=adjusted_end_date + timedelta(days=1), periods=n_days, freq='B')
# Extended forecast evaluation
forecast_start_date = future_dates[0]
forecast_end_date = future_dates[-1]
forecast_data_extended = extended_forecast_evaluation(
tickers, rolling_window, forecast_start_date,
forecast_end_date, future_dates, volatility_predictions)
# Plot forecast with confidence intervals
forecast_fig = plot_forecast(merged_data_train, future_dates, volatility_predictions,
lower_volatility_95, upper_volatility_95,
lower_volatility_68, upper_volatility_68)
# Plot extended forecast comparison
extended_forecast_fig = plot_extended_forecast(forecast_data_extended, future_dates, volatility_predictions)
# Calculate and display performance metrics
performance_metrics = calculate_performance_metrics(forecast_data_extended, future_dates, volatility_predictions)
# Plot residuals using Plotly
residual_fig = plot_residuals_plotly(results)
# Calculate metrics and plot errors using Plotly
error_fig = calculate_metrics_and_plot_errors_plotly(forecast_data_extended, future_dates, volatility_predictions)
# Store results in session_state
st.session_state['mp_results'] = {
'forecast_fig': forecast_fig,
'extended_forecast_fig': extended_forecast_fig,
'performance_metrics': performance_metrics,
'residual_fig': residual_fig,
'error_fig': error_fig
}
# Display plots and metrics using placeholders
with forecast_placeholder:
st.subheader("Forecast with Confidence Intervals")
st.plotly_chart(forecast_fig)
with extended_forecast_placeholder:
st.subheader("Extended Forecast Evaluation")
st.plotly_chart(extended_forecast_fig)
with metrics_placeholder:
st.markdown("#### Performance Metrics")
st.markdown(performance_metrics)
with residual_placeholder:
st.subheader("Residuals of Model")
st.plotly_chart(residual_fig)
with error_placeholder:
st.subheader("Prediction Errors")
st.plotly_chart(error_fig)
except Exception as e:
st.error(f"An error occurred while running the analysis: {e}")
elif 'mp_results' in st.session_state:
# Display stored results using placeholders
with forecast_placeholder:
st.subheader("Forecast with Confidence Intervals")
st.plotly_chart(st.session_state['mp_results']['forecast_fig'])
with extended_forecast_placeholder:
st.subheader("Extended Forecast Evaluation")
st.plotly_chart(st.session_state['mp_results']['extended_forecast_fig'])
with metrics_placeholder:
st.markdown("#### Performance Metrics")
st.markdown(st.session_state['mp_results']['performance_metrics'])
with residual_placeholder:
st.subheader("Residuals of Model")
st.plotly_chart(st.session_state['mp_results']['residual_fig'])
with error_placeholder:
st.subheader("Prediction Errors")
st.plotly_chart(st.session_state['mp_results']['error_fig'])
hide_streamlit_style = """
<style>
#MainMenu {visibility: hidden;}
footer {visibility: hidden;}
</style>
"""
st.markdown(hide_streamlit_style, unsafe_allow_html=True)