import streamlit as st import yfinance as yf import numpy as np import pandas as pd import plotly.graph_objs as go from plotly.subplots import make_subplots from statsmodels.tsa.api import VAR from statsmodels.tsa.stattools import adfuller from sklearn.preprocessing import MinMaxScaler from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score from datetime import datetime, timedelta # Helper functions remain unchanged (except for yfinance adjustments) def download_data(tickers, start_date, end_date): data = {} for name, ticker in tickers.items(): df = yf.download(ticker, start=start_date, end=end_date, auto_adjust=False) if isinstance(df.columns, pd.MultiIndex): df.columns = df.columns.get_level_values(0) if df.empty: raise ValueError(f"No data retrieved for {ticker}") if len(df) < 252: # Ensure enough data for meaningful volatility calculation (1 year) raise ValueError(f"Insufficient data points for {ticker}. Need at least 252 days.") data[name] = df return data def calculate_returns_and_volatility(data, rolling_window): stock_data = data['stock'] stock_data['Log_Returns'] = np.log(stock_data['Adj Close'] / stock_data['Adj Close'].shift(1)) stock_data['Volatility'] = stock_data['Log_Returns'].rolling(window=rolling_window).std() * np.sqrt(252) stock_data = stock_data.dropna() data['stock'] = stock_data sp500_data = data['sp500'] sp500_data['Log_Returns'] = np.log(sp500_data['Adj Close'] / sp500_data['Adj Close'].shift(1)) sp500_data['SP500_Volatility'] = sp500_data['Log_Returns'].rolling(window=rolling_window).std() * np.sqrt(252) sp500_data = sp500_data.dropna() data['sp500'] = sp500_data return data def merge_data(data): merged_data = data['stock'][['Volatility']].copy() merged_data['SP500'] = data['sp500']['Adj Close'] merged_data['SP500_Volatility'] = data['sp500']['SP500_Volatility'] merged_data['VIX'] = np.log(data['vix']['Adj Close']) merged_data['SP500_Returns'] = data['sp500']['Log_Returns'] merged_data['Volume'] = data['stock']['Volume'] merged_data['Stock_Returns'] = data['stock']['Log_Returns'] merged_data = merged_data.dropna() return merged_data def check_stationarity_and_difference(df): """ Perform ADF test for stationarity and apply differencing if necessary. """ for column in df.columns: result = adfuller(df[column].dropna()) p_value = result[1] if p_value > 0.05: # Non-stationary series; apply differencing df[column] = df[column].diff() else: pass # Series is stationary; no differencing needed def normalize_data(df): scaler = MinMaxScaler(feature_range=(0, 1)) scaled_data = pd.DataFrame(scaler.fit_transform(df), columns=df.columns, index=df.index) return scaled_data, scaler def fit_var_model(scaled_data, max_lags=30): model = VAR(scaled_data) lag_order_results = model.select_order(maxlags=max_lags) optimal_lag = lag_order_results.aic results = model.fit(optimal_lag) return results, optimal_lag def forecast_future_values(results, scaled_data, scaler, steps, optimal_lag): forecast_95, lower_95, upper_95 = results.forecast_interval( scaled_data.values[-optimal_lag:], steps=steps, alpha=0.05) forecast_68, lower_68, upper_68 = results.forecast_interval( scaled_data.values[-optimal_lag:], steps=steps, alpha=0.32) forecast_original = scaler.inverse_transform(forecast_95) lower_95_original = scaler.inverse_transform(lower_95) upper_95_original = scaler.inverse_transform(upper_95) lower_68_original = scaler.inverse_transform(lower_68) upper_68_original = scaler.inverse_transform(upper_68) return forecast_original, lower_95_original, upper_95_original, lower_68_original, upper_68_original # Plotting functions remain unchanged def plot_forecast(merged_data, future_dates, volatility_predictions, lower_volatility_95, upper_volatility_95, lower_volatility_68, upper_volatility_68): fig = go.Figure() # Plot historical volatility fig.add_trace(go.Scatter(x=merged_data.index, y=merged_data['Volatility'], mode='lines', name='Historical Volatility')) # Plot 95% confidence intervals fig.add_trace(go.Scatter(x=future_dates, y=upper_volatility_95, fill=None, mode='lines', line_color='lightgray', name='95% CI Upper')) fig.add_trace(go.Scatter(x=future_dates, y=lower_volatility_95, fill='tonexty', mode='lines', line_color='lightgray', name='95% CI Lower')) # Plot 68% confidence intervals fig.add_trace(go.Scatter(x=future_dates, y=upper_volatility_68, fill=None, mode='lines', line_color='blue', name='68% CI Upper')) fig.add_trace(go.Scatter(x=future_dates, y=lower_volatility_68, fill='tonexty', mode='lines', line_color='blue', name='68% CI Lower')) # Plot predicted volatility fig.add_trace(go.Scatter(x=future_dates, y=volatility_predictions, mode='lines',line_color='orange' ,name='Predicted Volatility', line=dict(dash='dot', width=4))) fig.update_layout(title='Predicted Volatility with Confidence Intervals', xaxis_title='Date', yaxis_title='Volatility', template='plotly_white') return fig def plot_extended_forecast(forecast_data_extended, future_dates, volatility_predictions): """ Plot extended actual historical volatility and predicted future volatility using Plotly. """ # Align the length of future dates and predicted values future_dates = future_dates[:len(volatility_predictions)] volatility_predictions = volatility_predictions[:len(future_dates)] fig = go.Figure() # Plot extended actual historical volatility fig.add_trace(go.Scatter(x=forecast_data_extended.index, y=forecast_data_extended['Volatility'], mode='lines', name='Extended Historical Volatility')) # Plot predicted future volatility fig.add_trace(go.Scatter(x=future_dates, y=volatility_predictions, mode='lines', name='Predicted Future Volatility', line=dict(dash='dash'))) fig.update_layout(title='Predicted Volatility with Extended Actual Data', xaxis_title='Date', yaxis_title='Volatility', template='plotly_white') return fig def calculate_performance_metrics(forecast_data_extended, future_dates, volatility_predictions): """ Calculate performance metrics and return as markdown text. """ # Ensure future_dates are in the same format as the forecast_data index new_future_dates = pd.to_datetime(future_dates) # Create a DataFrame for future dates and predicted values predicted_df = pd.DataFrame({ 'Date': new_future_dates, 'Predicted Volatility': volatility_predictions }).set_index('Date') # Extract the actual future volatility values for the prediction period actual_volatility = forecast_data_extended.loc[new_future_dates, 'Volatility'] # Create DataFrame for actual values actual_df = pd.DataFrame({ 'Date': actual_volatility.index, 'Actual Volatility': actual_volatility.values }).set_index('Date') # Join the actual and predicted DataFrames on the Date index results_df = actual_df.join(predicted_df, how='inner') # Metrics calculation rmse = np.sqrt(mean_squared_error(results_df['Actual Volatility'], results_df['Predicted Volatility'])) mape = mean_absolute_percentage_error(results_df['Actual Volatility'], results_df['Predicted Volatility']) mae = mean_absolute_error(results_df['Actual Volatility'], results_df['Predicted Volatility']) mse = mean_squared_error(results_df['Actual Volatility'], results_df['Predicted Volatility']) r2 = r2_score(results_df['Actual Volatility'], results_df['Predicted Volatility']) metrics = f""" **RMSE**: {rmse:.4f} **MAPE**: {mape:.2%} **MAE**: {mae:.4f} **MSE**: {mse:.4f} **R²**: {r2:.4f} """ return metrics def plot_residuals_plotly(results): """ Plot residuals of VAR model using Plotly. """ residuals = results.resid fig = go.Figure() for col in residuals.columns: fig.add_trace(go.Scatter(x=residuals.index, y=residuals[col], mode='lines', name=f'Residuals: {col}')) fig.update_layout(title='Residuals of Model', xaxis_title='Date', yaxis_title='Residuals', template='plotly_white', showlegend=False) return fig def calculate_metrics_and_plot_errors_plotly(forecast_data_extended, future_dates, volatility_predictions): """ Calculate performance metrics and plot prediction errors using Plotly. """ # Ensure future_dates are in the same format as the forecast_data index new_future_dates = pd.to_datetime(future_dates) # Create a DataFrame for future dates and predicted values predicted_df = pd.DataFrame({ 'Date': new_future_dates, 'Predicted Volatility': volatility_predictions }).set_index('Date') # Extract the actual future volatility values for the prediction period actual_volatility = forecast_data_extended.loc[new_future_dates, 'Volatility'] # Create DataFrame for actual values actual_df = pd.DataFrame({ 'Date': actual_volatility.index, 'Actual Volatility': actual_volatility.values }).set_index('Date') # Join the actual and predicted DataFrames on the Date index results_df = actual_df.join(predicted_df, how='inner') # Calculate errors over time results_df['Error'] = results_df['Actual Volatility'] - results_df['Predicted Volatility'] # Create a Plotly figure with two subplots fig = make_subplots(rows=2, cols=1, subplot_titles=("Scatter Plot of Predicted vs Actual Volatility", "Prediction Error Over Time")) # Scatter plot of predicted vs actual values fig.add_trace( go.Scatter( x=results_df['Actual Volatility'], y=results_df['Predicted Volatility'], mode='markers', name='Predicted vs Actual' ), row=1, col=1 ) # Add a line y = x min_vol = min(results_df['Actual Volatility'].min(), results_df['Predicted Volatility'].min()) max_vol = max(results_df['Actual Volatility'].max(), results_df['Predicted Volatility'].max()) fig.add_trace( go.Scatter( x=[min_vol, max_vol], y=[min_vol, max_vol], mode='lines', name='Perfect Prediction', line=dict(dash='dash', color='red') ), row=1, col=1 ) # Error plot over time fig.add_trace( go.Scatter( x=results_df.index, y=results_df['Error'], mode='lines+markers', name='Prediction Error' ), row=2, col=1 ) fig.update_layout(height=700, title="Model Performance: Prediction Errors", template='plotly_white') fig.update_xaxes(title_text='Actual Volatility', row=1, col=1) fig.update_yaxes(title_text='Predicted Volatility', row=1, col=1) fig.update_xaxes(title_text='Date', row=2, col=1) fig.update_yaxes(title_text='Error (Actual - Predicted)', row=2, col=1) return fig def extended_forecast_evaluation(tickers, rolling_window, forecast_start_date, forecast_end_date, future_dates, volatility_predictions): """ Extend forecast evaluation by comparing with actual data over an extended period. """ # Derive extended_start_date to ensure we have enough data for the rolling window extended_start_date = (forecast_start_date - timedelta(days=rolling_window * 3)).strftime('%Y-%m-%d') # Extended end date includes extra days for comparison extended_end_date = forecast_end_date + timedelta(days=extra_days) # Download the extended actual data for the stock extended_actual_data = yf.download(tickers['stock'], start=extended_start_date, end=extended_end_date.strftime('%Y-%m-%d'), auto_adjust=False) if isinstance(extended_actual_data.columns, pd.MultiIndex): extended_actual_data.columns = extended_actual_data.columns.get_level_values(0) if extended_actual_data.empty: raise ValueError(f"No extended data retrieved for {tickers['stock']}") if len(extended_actual_data) < rolling_window: raise ValueError(f"Insufficient extended data points for {tickers['stock']}. Need at least {rolling_window} days.") # Calculate daily returns and rolling volatility for the extended data extended_actual_data['Returns'] = extended_actual_data['Adj Close'].pct_change() extended_actual_data['Volatility'] = extended_actual_data['Returns'].rolling(window=rolling_window).std() * np.sqrt(252) # Create forecast horizon DataFrame forecast_horizon = pd.DataFrame(index=future_dates) forecast_horizon['Volatility'] = np.nan # Combine extended actual data with forecast horizon forecast_data_extended = pd.concat([extended_actual_data, forecast_horizon], axis=0).sort_index() forecast_data_extended['Volatility'] = forecast_data_extended['Volatility'].fillna(method='ffill') forecast_data_extended = forecast_data_extended.dropna(subset=['Volatility']) return forecast_data_extended # Set page configuration for a wide layout st.set_page_config(layout="wide") st.title("Volatility Forecasting Tool") st.sidebar.title("Input Parameters") # How-to-use instructions in an expander with st.sidebar.expander("How to Use the App", expanded=False): st.markdown(""" **Step 1**: Select the page you want to use (Real-time Predictions or Model Performance). **Step 2**: Enter the stock ticker symbol you wish to analyze. **Step 3**: Adjust the start and end dates for your analysis. **Step 4**: Configure additional parameters like rolling window and forecast horizon. **Step 5**: Click the **Run Model** button to generate the forecasts and view the results. """) # Pages page = st.sidebar.radio("Choose Page", ("Real-time Predictions", "Model Performance")) # Common Sidebar inputs within an expander (opened by default) with st.sidebar.expander("Ticker and Date Selection", expanded=True): stock_ticker = st.text_input("Stock Ticker", value="ASML", help="Enter the ticker symbol of the stock you want to analyze (e.g., AAPL for Apple Inc.).") # Hide VIX and SP500 tickers by using default values internally tickers = {"stock": stock_ticker, "sp500": "^GSPC", "vix": "^VIX"} # Additional parameters within another expander (opened by default) with st.sidebar.expander("Model Parameters", expanded=True): rolling_window = st.number_input( "Rolling Window", min_value=1, value=21, help="The number of days to use for calculating the rolling volatility." ) n_days = st.number_input( "Forecast Horizon (Days)", min_value=1, value=30, help="The number of future days over which to forecast volatility." ) if page == "Model Performance": extra_days = st.number_input( "Extra Days of Actual Data for Comparison", min_value=1, value=15, help="Additional days of actual future data to include for comparison with the forecast." ) # Separate Start and End Dates for each page within the expander if page == "Real-time Predictions": with st.sidebar.expander("Date Range Selection", expanded=True): start_date_rt = st.date_input( "Start Date", value=datetime(2020, 1, 1), key='start_date_rt', help="The start date for getting the historical data." ) end_date_rt = st.date_input( "End Date", value=datetime.now(), key='end_date_rt', max_value=datetime.now(), help="The end date for getting the historical data." ) # Context description in the main body st.markdown(""" ### Real-time Predictions This apps allows you to generate real-time forecasts of stock price volatility using an advanced multi-variate deep learning learning model using external factors. Volatility is calculated as the rolling standard deviation of the stock's daily log returns. The model provides confidence intervals (68% and 95%) to represent uncertainty in the predictions. """) # Run button run_button = st.sidebar.button("Run Model", key='run_button_rt') # Placeholder for plots plot_placeholder = st.empty() if run_button: try: with st.spinner("Downloading data and processing (This will take a few seconds)..."): data = download_data(tickers, start_date_rt, end_date_rt) data = calculate_returns_and_volatility(data, rolling_window) merged_data = merge_data(data) # Preprocess the data scaled_data, scaler = normalize_data(merged_data) # Fit the VAR model results, optimal_lag = fit_var_model(scaled_data) # Forecast future values forecast_original, lower_95_original, upper_95_original, lower_68_original, upper_68_original = forecast_future_values( results, scaled_data, scaler, n_days, optimal_lag) volatility_predictions = forecast_original[:, 0] lower_volatility_95 = lower_95_original[:, 0] upper_volatility_95 = upper_95_original[:, 0] lower_volatility_68 = lower_68_original[:, 0] upper_volatility_68 = upper_68_original[:, 0] future_dates = pd.date_range(start=end_date_rt + timedelta(days=1), periods=n_days, freq='B') # Display the forecast plot forecast_fig = plot_forecast(merged_data, future_dates, volatility_predictions, lower_volatility_95, upper_volatility_95, lower_volatility_68, upper_volatility_68) # Store results in session_state st.session_state['rt_results'] = {'forecast_fig': forecast_fig} # Display the plot using the placeholder with plot_placeholder: st.subheader("Forecasted Volatility") st.plotly_chart(forecast_fig) except Exception as e: st.error(f"An error occurred while running the analysis: {e}") elif 'rt_results' in st.session_state: # Display stored plot using the placeholder with plot_placeholder: st.subheader("Forecasted Volatility") st.plotly_chart(st.session_state['rt_results']['forecast_fig']) elif page == "Model Performance": with st.sidebar.expander("Date Range Selection", expanded=True): # Model Performance page date inputs start_date_mp = st.date_input( "Start Date", value=datetime(2020, 1, 1), key='start_date_mp', help="The start date for downloading historical data." ) # Calculate the maximum allowable end date for model performance today = datetime.now().date() max_end_date_mp = today - timedelta(days=int(n_days + extra_days)) end_date_mp = st.date_input( "End Date", value=max_end_date_mp, max_value=max_end_date_mp, key='end_date_mp', help="The end date for training the model. Cannot exceed the maximum allowed date." ) # Context description in the main body st.markdown(""" ### Model Performance Here you assess how well the model forecasts volatility by comparing predicted values with actual historical (unseen) data. djust the parameters in the sidebar and click **Run Model** to assess performance. """) with st.expander("The following analyses are performed", expanded=False): st.markdown(""" 1. **Predicted vs Actual Volatility**: The app compares the predicted stock volatility with actual volatility over a given time period. Volatility is calculated as the rolling standard deviation of daily log returns. The forecasted values are plotted alongside actual values to visualize performance. 2. **Residual Analysis**: Residuals represent the difference between the actual and predicted values. A plot of the residuals helps identify patterns or systematic errors in the predictions, such as under or overestimation. 3. **Error Metrics**: The app calculates several error metrics to quantify the accuracy of the predictions: - **RMSE (Root Mean Squared Error)**: Measures the average magnitude of errors in the predictions, penalizing larger errors. - **MAE (Mean Absolute Error)**: Represents the average absolute difference between predicted and actual volatility. - **MAPE (Mean Absolute Percentage Error)**: Shows the prediction accuracy as a percentage, providing a relative measure of performance. - **R² (R-squared)**: Indicates how well the predicted values explain the variability in the actual volatility, with a value closer to 1 indicating better performance. 4. **Confidence Intervals**: The model provides 68% and 95% confidence intervals to quantify uncertainty around the predictions. Wider intervals indicate more uncertainty, while narrower ones suggest more confidence in the forecasts. **Instructions:** - **Adjust Parameters**: Set the rolling window, forecast horizon, and extra days for comparison in the sidebar. - **Run the Model**: Click **Run Model** to download data, train the model, and evaluate its performance using actual market data. - **Evaluate Results**: The app visualizes the results with performance metrics, residual plots, and error analysis to help gauge how well the model performs. """) # Run button run_button = st.sidebar.button("Run Model", key='run_button_mp') # Placeholders for plots and metrics forecast_placeholder = st.empty() extended_forecast_placeholder = st.empty() metrics_placeholder = st.empty() residual_placeholder = st.empty() error_placeholder = st.empty() if run_button: try: with st.spinner("Downloading data and processing predictions (This will take a few seconds)..."): # Convert end_date_mp to datetime if necessary adjusted_end_date = pd.to_datetime(end_date_mp) # Extended end date includes n_days forecast plus extra_days for comparison extended_end_date = adjusted_end_date + timedelta(days=n_days + extra_days) data = download_data(tickers, start_date_mp, extended_end_date) data = calculate_returns_and_volatility(data, rolling_window) merged_data = merge_data(data) # Ensure that the data is up to adjusted_end_date for training merged_data_train = merged_data[merged_data.index <= adjusted_end_date] # Check stationarity and difference if necessary merged_data_diff = merged_data_train.copy() check_stationarity_and_difference(merged_data_diff) merged_data_diff = merged_data_diff.dropna() # Normalize data scaled_data, scaler = normalize_data(merged_data_diff) # Fit VAR model results, optimal_lag = fit_var_model(scaled_data) # Forecast future values forecast_original, lower_95_original, upper_95_original, lower_68_original, upper_68_original = forecast_future_values( results, scaled_data, scaler, n_days, optimal_lag) volatility_predictions = forecast_original[:, 0] lower_volatility_95 = lower_95_original[:, 0] upper_volatility_95 = upper_95_original[:, 0] lower_volatility_68 = lower_68_original[:, 0] upper_volatility_68 = upper_68_original[:, 0] # Generate future dates future_dates = pd.date_range(start=adjusted_end_date + timedelta(days=1), periods=n_days, freq='B') # Extended forecast evaluation forecast_start_date = future_dates[0] forecast_end_date = future_dates[-1] forecast_data_extended = extended_forecast_evaluation( tickers, rolling_window, forecast_start_date, forecast_end_date, future_dates, volatility_predictions) # Plot forecast with confidence intervals forecast_fig = plot_forecast(merged_data_train, future_dates, volatility_predictions, lower_volatility_95, upper_volatility_95, lower_volatility_68, upper_volatility_68) # Plot extended forecast comparison extended_forecast_fig = plot_extended_forecast(forecast_data_extended, future_dates, volatility_predictions) # Calculate and display performance metrics performance_metrics = calculate_performance_metrics(forecast_data_extended, future_dates, volatility_predictions) # Plot residuals using Plotly residual_fig = plot_residuals_plotly(results) # Calculate metrics and plot errors using Plotly error_fig = calculate_metrics_and_plot_errors_plotly(forecast_data_extended, future_dates, volatility_predictions) # Store results in session_state st.session_state['mp_results'] = { 'forecast_fig': forecast_fig, 'extended_forecast_fig': extended_forecast_fig, 'performance_metrics': performance_metrics, 'residual_fig': residual_fig, 'error_fig': error_fig } # Display plots and metrics using placeholders with forecast_placeholder: st.subheader("Forecast with Confidence Intervals") st.plotly_chart(forecast_fig) with extended_forecast_placeholder: st.subheader("Extended Forecast Evaluation") st.plotly_chart(extended_forecast_fig) with metrics_placeholder: st.markdown("#### Performance Metrics") st.markdown(performance_metrics) with residual_placeholder: st.subheader("Residuals of Model") st.plotly_chart(residual_fig) with error_placeholder: st.subheader("Prediction Errors") st.plotly_chart(error_fig) except Exception as e: st.error(f"An error occurred while running the analysis: {e}") elif 'mp_results' in st.session_state: # Display stored results using placeholders with forecast_placeholder: st.subheader("Forecast with Confidence Intervals") st.plotly_chart(st.session_state['mp_results']['forecast_fig']) with extended_forecast_placeholder: st.subheader("Extended Forecast Evaluation") st.plotly_chart(st.session_state['mp_results']['extended_forecast_fig']) with metrics_placeholder: st.markdown("#### Performance Metrics") st.markdown(st.session_state['mp_results']['performance_metrics']) with residual_placeholder: st.subheader("Residuals of Model") st.plotly_chart(st.session_state['mp_results']['residual_fig']) with error_placeholder: st.subheader("Prediction Errors") st.plotly_chart(st.session_state['mp_results']['error_fig']) hide_streamlit_style = """ """ st.markdown(hide_streamlit_style, unsafe_allow_html=True)