Spaces:
Runtime error
Runtime error
| import requests | |
| import pandas as pd | |
| from io import StringIO | |
| import streamlit as st | |
| import os | |
| import plotly.express as px | |
| import plotly.graph_objects as go | |
| import plotly.colors as pc | |
| import numpy as np | |
| from sklearn.metrics import mean_squared_error | |
| from statsmodels.tsa.stattools import acf | |
| from statsmodels.graphics.tsaplots import plot_acf | |
| import matplotlib.pyplot as plt | |
| from datetime import datetime | |
| def get_current_time(): | |
| now = datetime.now() | |
| current_hour = now.hour | |
| current_minute = now.minute | |
| # Return the hour and a boolean indicating if it is after the 10th minute | |
| return current_hour, current_minute >= 10 | |
| ##GET ALL FILES FROM GITHUB | |
| def load_GitHub(github_token, file_name, hour, after_10_min): | |
| url = f'https://raw.githubusercontent.com/margaridamascarenhas/Transparency_Data/main/{file_name}' | |
| headers = {'Authorization': f'token {github_token}'} | |
| response = requests.get(url, headers=headers) | |
| if response.status_code == 200: | |
| csv_content = StringIO(response.text) | |
| df = pd.read_csv(csv_content) | |
| if 'Date' in df.columns: | |
| df['Date'] = pd.to_datetime(df['Date']) # Convert 'Date' column to datetime | |
| df.set_index('Date', inplace=True) # Set 'Date' column as the index | |
| #df.to_csv(file_name) | |
| return df | |
| else: | |
| print(f"Failed to download {file_name}. Status code: {response.status_code}") | |
| return None | |
| def load_forecast(github_token, hour, after_10_min): | |
| predictions_dict = {} | |
| for hour in range(24): | |
| file_name = f'Predictions_{hour}h.csv' | |
| df = load_GitHub(github_token, file_name, hour, after_10_min) | |
| if df is not None: | |
| predictions_dict[file_name] = df | |
| return predictions_dict | |
| def convert_European_time(data, time_zone): | |
| data.index = pd.to_datetime(data.index, utc=True) | |
| data.index = data.index.tz_convert(time_zone) | |
| data.index = data.index.tz_localize(None) | |
| return data | |
| def simplify_model_names(df): | |
| # Define the mapping of complex names to simpler ones | |
| replacements = { | |
| r'\.LightGBMModel\.\dD\.TimeCov\.Temp\.Forecast_elia': '.LightGBM_with_Forecast_elia', | |
| r'\.LightGBMModel\.\dD\.TimeCov\.Temp': '.LightGBM', | |
| r'\.Naive\.\dD': '.Naive', | |
| } | |
| # Apply the replacements | |
| for original, simplified in replacements.items(): | |
| df.columns = df.columns.str.replace(original, simplified, regex=True) | |
| return df | |
| def simplify_model_names_in_index(df): | |
| # Define the mapping of complex names to simpler ones | |
| replacements = { | |
| r'\.LightGBMModel\.\dD\.TimeCov\.Temp\.Forecast_elia': '.LightGBM_with_Forecast_elia', | |
| r'\.LightGBMModel\.\dD\.TimeCov\.Temp': '.LightGBM', | |
| r'\.Naive\.\dD': '.Naive', | |
| } | |
| # Apply the replacements to the DataFrame index | |
| for original, simplified in replacements.items(): | |
| df.index = df.index.str.replace(original, simplified, regex=True) | |
| return df | |
| github_token = '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' | |
| if github_token: | |
| hour, after_10_min=get_current_time() | |
| forecast_dict = load_forecast(github_token, hour, after_10_min) | |
| historical_forecast=load_GitHub(github_token, 'Historical_forecast.csv') | |
| Data_BE=load_GitHub(github_token, 'BE_Elia_Entsoe_UTC.csv') | |
| Data_FR=load_GitHub(github_token, 'FR_Entsoe_UTC.csv') | |
| Data_NL=load_GitHub(github_token, 'NL_Entsoe_UTC.csv') | |
| Data_DE=load_GitHub(github_token, 'DE_Entsoe_UTC.csv') | |
| Data_BE=convert_European_time(Data_BE, 'Europe/Brussels') | |
| Data_FR=convert_European_time(Data_FR, 'Europe/Paris') | |
| Data_NL=convert_European_time(Data_NL, 'Europe/Amsterdam') | |
| Data_DE=convert_European_time(Data_DE, 'Europe/Berlin') | |
| else: | |
| print("Please enter your GitHub Personal Access Token to proceed.") | |
| # Main layout of the app | |
| col1, col2 = st.columns([5, 2]) # Adjust the ratio to better fit your layout needs | |
| with col1: | |
| st.title("Transparency++") | |
| with col2: | |
| upper_space = col2.empty() | |
| upper_space = col2.empty() | |
| col2_1, col2_2 = st.columns(2) # Create two columns within the right column for side-by-side images | |
| with col2_1: | |
| st.image("KU_Leuven_logo.png", width=100) # Adjust the path and width as needed | |
| with col2_2: | |
| st.image("energyville_logo.png", width=100) | |
| upper_space.markdown(""" | |
| | |
| | |
| """, unsafe_allow_html=True) | |
| countries = { | |
| 'Netherlands': 'NL', | |
| 'Germany': 'DE', | |
| 'France': 'FR', | |
| 'Belgium': 'BE', | |
| } | |
| st.sidebar.header('Filters') | |
| st.sidebar.subheader("Select Country") | |
| st.sidebar.caption("Choose the country for which you want to display data or forecasts.") | |
| selected_country = st.sidebar.selectbox('Select Country', list(countries.keys())) | |
| st.sidebar.subheader("Select Date Range ") | |
| st.sidebar.caption("Define the time period over which the accuracy metrics will be calculated.") | |
| st.write() | |
| date_range = st.sidebar.date_input("Select Date Range for Metrics Calculation:", | |
| value=(pd.to_datetime("2024-01-01"), pd.to_datetime(pd.Timestamp('today')))) | |
| # Ensure the date range provides two dates | |
| if len(date_range) == 2: | |
| start_date = pd.Timestamp(date_range[0]) | |
| end_date = pd.Timestamp(date_range[1]) | |
| else: | |
| st.error("Please select a valid date range.") | |
| st.stop() | |
| st.sidebar.subheader("Section") | |
| st.sidebar.caption("Select the type of information you want to explore.") | |
| # Sidebar with radio buttons for different sections | |
| section = st.sidebar.radio('', ['Data', 'Forecasts', 'Insights'],index=1) | |
| country_code = countries[selected_country] | |
| if country_code == 'BE': | |
| data = Data_BE | |
| weather_columns = ['Temperature', 'Wind Speed Onshore', 'Wind Speed Offshore'] | |
| data['Temperature'] = data['temperature_2m_8'] | |
| data['Wind Speed Offshore'] = data['wind_speed_100m_4'] | |
| data['Wind Speed Onshore'] = data['wind_speed_100m_8'] | |
| elif country_code == 'DE': | |
| data = Data_DE | |
| weather_columns = ['Temperature', 'Wind Speed'] | |
| data['Temperature'] = data['temperature_2m'] | |
| data['Wind Speed'] = data['wind_speed_100m'] | |
| elif country_code == 'NL': | |
| data = Data_NL | |
| weather_columns = ['Temperature', 'Wind Speed'] | |
| data['Temperature'] = data['temperature_2m'] | |
| data['Wind Speed'] = data['wind_speed_100m'] | |
| elif country_code == 'FR': | |
| data = Data_FR | |
| weather_columns = ['Temperature', 'Wind Speed'] | |
| data['Temperature'] = data['temperature_2m'] | |
| data['Wind Speed'] = data['wind_speed_100m'] | |
| def add_feature(df2, df_main): | |
| #df_main.index = pd.to_datetime(df_main.index) | |
| #df2.index = pd.to_datetime(df2.index) | |
| df_combined = df_main.combine_first(df2) | |
| last_date_df1 = df_main.index.max() | |
| first_date_df2 = df2.index.min() | |
| if first_date_df2 == last_date_df1 + pd.Timedelta(hours=1): | |
| df_combined = pd.concat([df_main, df2[df2.index > last_date_df1]], axis=0) | |
| #df_combined.reset_index(inplace=True) | |
| return df_combined | |
| #data.index = data.index.tz_localize('UTC') | |
| forecast_columns = [ | |
| 'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe'] | |
| if section == 'Data': | |
| st.header("Data") | |
| st.write(""" | |
| This section allows you to explore and upload your datasets. | |
| You can visualize raw data, clean it, and prepare it for analysis. | |
| """) | |
| st.header('Data Quality') | |
| st.write('The table below presents the data quality metrics for various energy-related datasets, focusing on the percentage of missing values and the occurrence of extreme or nonsensical values for the selected country.') | |
| data_quality=data.iloc[:-28] | |
| # Report % of missing values | |
| missing_values = data_quality[forecast_columns].isna().mean() * 100 | |
| missing_values = missing_values.round(2) | |
| installed_capacities = { | |
| 'FR': { 'Solar': 17419, 'Wind Offshore': 1483, 'Wind Onshore': 22134}, | |
| 'DE': { 'Solar': 73821, 'Wind Offshore': 8386, 'Wind Onshore': 59915}, | |
| 'BE': { 'Solar': 8789, 'Wind Offshore': 2262, 'Wind Onshore': 3053}, | |
| 'NL': { 'Solar': 22590, 'Wind Offshore': 3220, 'Wind Onshore': 6190}, | |
| } | |
| if country_code not in installed_capacities: | |
| st.error(f"Installed capacities not defined for country code '{country_code}'.") | |
| st.stop() | |
| # Report % of extreme, impossible values for the selected country | |
| capacities = installed_capacities[country_code] | |
| extreme_values = {} | |
| for col in forecast_columns: | |
| if 'Solar_entsoe' in col: | |
| extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Solar'])).mean() * 100 | |
| elif 'Solar_forecast_entsoe' in col: | |
| extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Solar'])).mean() * 100 | |
| elif 'Wind_onshore_entsoe' in col: | |
| extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Wind Onshore'])).mean() * 100 | |
| elif 'Wind_onshore_forecast_entsoe' in col: | |
| extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Wind Onshore'])).mean() * 100 | |
| elif 'Wind_offshore_entsoe' in col: | |
| extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Wind Offshore'])).mean() * 100 | |
| elif 'Wind_offshore_forecast_entsoe' in col: | |
| extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Wind Offshore'])).mean() * 100 | |
| elif 'Load_entsoe' in col: | |
| extreme_values[col] = ((data_quality[col] < 0)).mean() * 100 | |
| elif 'Load_forecast_entsoe' in col: | |
| extreme_values[col] = ((data_quality[col] < 0)).mean() * 100 | |
| extreme_values = pd.Series(extreme_values).round(2) | |
| # Combine all metrics into one DataFrame | |
| metrics_df = pd.DataFrame({ | |
| 'Missing Values (%)': missing_values, | |
| 'Extreme/Nonsensical Values (%)': extreme_values, | |
| }) | |
| st.markdown( | |
| """ | |
| <style> | |
| .dataframe {font-size: 45px !important;} | |
| </style> | |
| """, | |
| unsafe_allow_html=True | |
| ) | |
| st.dataframe(metrics_df) | |
| st.write('<b><u>Missing values (%)</u></b>: Percentage of missing values in the dataset', unsafe_allow_html=True) | |
| st.write('<b><u>Extreme/Nonsensical values (%)</u></b>: Values that are considered implausible such as negative or out-of-bound values i.e., (generation<0) or (generation>capacity)', unsafe_allow_html=True) | |
| # Section 2: Forecasts | |
| elif section == 'Forecasts': | |
| st.header('Forecast Quality') | |
| # Time series for last 1 week | |
| st.subheader('Time Series: Last 1 Week') | |
| last_week = data.loc[data.index >= (data.index[-1] - pd.Timedelta(days=7))] | |
| st.write('The below plots show the time series of forecasts vs. observations provided by the ENTSO-E Transparency platform between the selected data range.') | |
| forecast_columns = [ | |
| 'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe'] | |
| num_per_var=2 | |
| forecast_columns_line=forecast_columns | |
| for i in range(0, len(forecast_columns_line), num_per_var): | |
| actual_col = forecast_columns_line[i] | |
| forecast_col = forecast_columns_line[i + 1] | |
| if forecast_col in data.columns: | |
| fig = go.Figure() | |
| fig.add_trace(go.Scatter(x=last_week.index, y=last_week[actual_col], mode='lines', name='Actual')) | |
| fig.add_trace(go.Scatter(x=last_week.index, y=last_week[forecast_col], mode='lines', name='Forecast ENTSO-E')) | |
| fig.update_layout(title=f'Forecasts vs Actual for {actual_col}', xaxis_title='Date', yaxis_title='Value [MW]') | |
| st.plotly_chart(fig) | |
| def plot_category(df_dict, category_prefix, title): | |
| fig = go.Figure() | |
| # Define base colors for each model | |
| model_colors = { | |
| 'LightGBMModel.TimeCov.Temp.Forecast_elia': '#1f77b4', # Blue | |
| 'LightGBMModel.TimeCov.Temp': '#2ca02c', # Green | |
| 'Naive': '#ff7f0e' # Orange | |
| } | |
| # To keep track of which model has been added to the legend | |
| legend_added = {'LightGBMModel.TimeCov.Temp.Forecast_elia': False, 'LightGBMModel.TimeCov.Temp': False, 'Naive': False} | |
| for file_name, df in df_dict.items(): | |
| # Extract the hour from the filename, assuming the format is "Predictions_Xh.csv" | |
| hour = int(file_name.split('_')[1].replace('h.csv', '')) | |
| filtered_columns = [col for col in df.columns if col.startswith(category_prefix)] | |
| for column in filtered_columns: | |
| # Identify the model type with more precise logic | |
| if 'LightGBMModel' in column: | |
| if 'Forecast_elia' in column: | |
| model_key = 'LightGBMModel.TimeCov.Temp.Forecast_elia' | |
| elif 'TimeCov' in column: | |
| model_key = 'LightGBMModel.TimeCov.Temp' | |
| elif 'Naive' in column: | |
| model_key = 'Naive' | |
| else: | |
| continue # Skip if it doesn't match any model type | |
| # Extract the relevant part of the model name | |
| parts = column.split('.') | |
| model_name_parts = parts[1:] # Skip the variable prefix | |
| model_name = '.'.join(model_name_parts) # Rejoin the parts to form the model name | |
| # Get the base color for the model | |
| base_color = model_colors[model_key] | |
| # Calculate the color shade based on the hour | |
| color_scale = pc.hex_to_rgb(base_color) | |
| scale_factor = 0.3 + (hour / 40) # Adjust scale to ensure the gradient is visible | |
| adjusted_color = tuple(int(c * scale_factor) for c in color_scale) | |
| # Convert to RGBA with transparency for plot lines | |
| line_color = f'rgba({adjusted_color[0]}, {adjusted_color[1]}, {adjusted_color[2]}, 0.1)' # Transparent color for lines | |
| # Combine the hour and the model name for the legend, but only add the legend entry once | |
| show_legend = not legend_added[model_key] | |
| fig.add_trace(go.Scatter( | |
| x=df.index, # Assuming 'Date' is the index, use 'df.index' for x-axis | |
| y=df[column], | |
| mode='lines', | |
| name=model_name if show_legend else None, # Use the model name for the legend, but only once | |
| line=dict(color=base_color if show_legend else line_color), # Use opaque color for legend, transparent for lines | |
| showlegend=show_legend, # Show legend only once per model | |
| legendgroup=model_key # Grouping for consistent legend color | |
| )) | |
| # Mark that this model has been added to the legend | |
| if show_legend: | |
| legend_added[model_key] = True | |
| # Add real values as a separate trace, if provided | |
| filtered_Data_BE_df = Data_BE.loc[df.index] | |
| if filtered_Data_BE_df[f'{category_prefix}_entsoe'].notna().any(): | |
| fig.add_trace(go.Scatter( | |
| x=filtered_Data_BE_df.index, | |
| y=filtered_Data_BE_df[f'{category_prefix}_entsoe'], | |
| mode='lines', | |
| name=f'Actual {category_prefix}', | |
| line=dict(color='black', width=2), # Black line for real values | |
| showlegend=True # Always show this in the legend | |
| )) | |
| # Update layout to position the legend at the top, side by side | |
| fig.update_layout( | |
| title=dict( | |
| text=title, | |
| x=0, # Center the title horizontally | |
| y=1.00, # Slightly lower the title to create more space | |
| xanchor='left', | |
| yanchor='top' | |
| ), | |
| xaxis_title='Date', | |
| yaxis_title='Value', | |
| legend=dict( | |
| orientation="h", # Horizontal legend | |
| yanchor="bottom", # Align to the bottom of the legend box | |
| y=1, # Increase y position to avoid overlap with the title | |
| xanchor="center", # Center the legend horizontally | |
| x=0.5 # Position at the center of the plot | |
| ) | |
| ) | |
| return fig | |
| def calculate_mae(y_true, y_pred): | |
| return np.mean(np.abs(y_true - y_pred)) | |
| def plot_mae_comparison(df_dict, category_prefix, title, real_values_df): | |
| hours = list(range(24)) | |
| if category_prefix=='Load': | |
| model_colors = { | |
| 'LightGBMModel.7D.TimeCov.Temp.Forecast_elia': '#1F77B4', # Blue | |
| 'LightGBMModel.7D.TimeCov.Temp': '#2CA02C', # Green | |
| 'Naive': '#FF7F0E' # Orange | |
| } | |
| else: | |
| model_colors = { | |
| 'LightGBMModel.1D.TimeCov.Temp.Forecast_elia': '#1F77B4', # Blue | |
| 'LightGBMModel.1D.TimeCov.Temp': '#2CA02C', # Green | |
| 'Naive': '#FF7F0E' # Orange | |
| } | |
| fig = go.Figure() | |
| for model_key, base_color in model_colors.items(): | |
| hours_with_data = [] | |
| mae_ratios = [] | |
| for hour in hours: | |
| file_name = f'Predictions_{hour}h.csv' | |
| df = df_dict.get(file_name, None) | |
| if df is None: | |
| continue | |
| if isinstance(df.index, pd.DatetimeIndex): | |
| first_day = df.index.min().normalize() | |
| last_day = df.index.max().normalize() | |
| df = df[df.index.normalize() != first_day] | |
| df = df[df.index.normalize() != last_day] | |
| # Adjusted filtering logic based on actual column names | |
| filtered_columns = [col for col in df.columns if col.startswith(f"{category_prefix}_entsoe") and model_key in col] | |
| if not filtered_columns: | |
| continue | |
| # Assuming only one column matches, otherwise refine the selection logic | |
| model_predictions = df[filtered_columns[0]] | |
| actual_values = real_values_df[f'{category_prefix}_entsoe'] | |
| actual_values = actual_values.dropna() | |
| # Align both series by their common indices | |
| common_indices = model_predictions.index.intersection(actual_values.index) | |
| aligned_model_predictions = model_predictions.loc[common_indices] | |
| aligned_actual_values = actual_values.loc[common_indices] | |
| # Calculate MAE for the model | |
| model_mae = calculate_mae(aligned_actual_values, aligned_model_predictions) | |
| # Calculate MAE for the entsoe forecast | |
| entsoe_forecast = real_values_df[f'{category_prefix}_forecast_entsoe'].loc[common_indices] | |
| entsoe_mae = calculate_mae(aligned_actual_values, entsoe_forecast) | |
| # Calculate MAE ratio | |
| mae_ratio = model_mae / entsoe_mae | |
| mae_ratios.append(mae_ratio) | |
| hours_with_data.append(hour) | |
| # Plot the MAE ratio for this model as points | |
| if mae_ratios: # Only plot if there's data | |
| fig.add_trace(go.Scatter( | |
| x=hours_with_data, # The hours where we have data | |
| y=mae_ratios, | |
| mode='markers+lines', # Plot as points connected by lines | |
| name=model_key, | |
| line=dict(color=base_color), | |
| marker=dict(color=base_color, size=8) # Customize marker size | |
| )) | |
| # Update layout | |
| fig.update_layout( | |
| title=f'{category_prefix}: rMAE<span style="font-size:11px;">ENTSO-E</span> by hour of Forecasting.', | |
| xaxis_title='Hour of Forecast', | |
| yaxis_title='MAE Ratio (Model / entsoe)', | |
| legend=dict( | |
| orientation="h", | |
| yanchor="bottom", | |
| y=1.02, | |
| xanchor="center", | |
| x=0.5 | |
| ) | |
| ) | |
| return fig | |
| def plot_mae_comparison_clock(df_dict, category_prefix, title, real_values_df): | |
| hours = list(range(24)) | |
| if category_prefix=='Load': | |
| model_colors = { | |
| 'LightGBM_with_Forecast_elia': '#1F77B4', # Blue | |
| 'LightGBM': '#2CA02C', # Green | |
| 'Naive': '#FF7F0E' # Orange | |
| } | |
| else: | |
| model_colors = { | |
| 'LightGBM_with_Forecast_elia': '#1F77B4', # Blue | |
| 'LightGBM': '#2CA02C', # Green | |
| 'Naive': '#FF7F0E' # Orange | |
| } | |
| fig = go.Figure() | |
| for model_key, base_color in model_colors.items(): | |
| hours_with_data = [] | |
| mae_ratios = [] | |
| for hour in hours: | |
| file_name = f'Predictions_{hour}h.csv' | |
| df = df_dict.get(file_name, None) | |
| if df is None: | |
| continue | |
| if isinstance(df.index, pd.DatetimeIndex): | |
| first_day = df.index.min().normalize() | |
| last_day = df.index.max().normalize() | |
| df = df[df.index.normalize() != first_day] | |
| df = df[df.index.normalize() != last_day] | |
| filtered_columns = [col for col in df.columns if col.startswith(f"{category_prefix}_entsoe") and model_key in col] | |
| if not filtered_columns: | |
| print(f"No matching columns for {model_key} at hour {hour}. Skipping...") | |
| continue | |
| model_predictions = df[filtered_columns[0]] | |
| actual_values = real_values_df[f'{category_prefix}_entsoe'] | |
| actual_values = actual_values.dropna() | |
| common_indices = model_predictions.index.intersection(actual_values.index) | |
| aligned_model_predictions = model_predictions.loc[common_indices] | |
| aligned_actual_values = actual_values.loc[common_indices] | |
| model_mae = calculate_mae(aligned_actual_values, aligned_model_predictions) | |
| entsoe_forecast = real_values_df[f'{category_prefix}_forecast_entsoe'].loc[common_indices] | |
| entsoe_mae = calculate_mae(aligned_actual_values, entsoe_forecast) | |
| mae_ratio = model_mae / entsoe_mae | |
| mae_ratios.append(mae_ratio) | |
| hours_with_data.append(hour) | |
| if mae_ratios: | |
| fig.add_trace(go.Scatterpolar( | |
| r=mae_ratios + [mae_ratios[0]], # Ensure closure of the polar plot | |
| theta=[h * 15 for h in hours_with_data] + [0], # Ensure closure at 0 degrees | |
| mode='lines+markers', | |
| name=model_key, | |
| line=dict(color=base_color), | |
| marker=dict(color=base_color, size=8) | |
| )) | |
| else: | |
| print(f"No data to plot for {model_key}.") # Debugging print | |
| fig.update_layout( | |
| polar=dict( | |
| radialaxis=dict(visible=True, range=[0, max(max(mae_ratios), 1.0) * 1.1] if mae_ratios else [0, 1.0]), | |
| angularaxis=dict(tickmode='array', tickvals=[h * 15 for h in hours], ticktext=hours) | |
| ), | |
| title=f'{category_prefix}: rMAE<span style="font-size:11px;">ENTSO-E</span> by Hour of Forecasting', | |
| showlegend=True | |
| ) | |
| return fig | |
| # Scatter plots for error distribution | |
| st.subheader('Error Distribution') | |
| st.write('The below scatter plots show the error distribution of all three fields: Solar, Wind and Load between the selected date range') | |
| data_2024 = data[data.index.year > 2023] | |
| for i in range(0, len(forecast_columns), 2): | |
| actual_col = forecast_columns[i] | |
| forecast_col = forecast_columns[i + 1] | |
| if forecast_col in data_2024.columns: | |
| obs = data_2024[actual_col] | |
| pred = data_2024[forecast_col] | |
| error = pred - obs | |
| fig = px.scatter(x=obs, y=pred, labels={'x': 'Observed [MW]', 'y': 'Predicted by ENTSO-E [MW]'}) | |
| fig.update_layout(title=f'Error Distribution for {forecast_col}') | |
| st.plotly_chart(fig) | |
| st.subheader('Accuracy Metrics (Sorted by rMAE):') | |
| output_text = f"The below metrics are calculated from the selected date range from {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}. This interval can be adjusted from the sidebar." | |
| st.write(output_text) | |
| data = data.loc[start_date:end_date] | |
| accuracy_metrics = pd.DataFrame(columns=['MAE', 'rMAE'], index=['Load', 'Solar', 'Wind Onshore', 'Wind Offshore']) | |
| for i in range(0, len(forecast_columns), 2): | |
| actual_col = forecast_columns[i] | |
| forecast_col = forecast_columns[i + 1] | |
| if forecast_col in data.columns: | |
| obs = data[actual_col] | |
| pred = data[forecast_col] | |
| error = pred - obs | |
| mae = round(np.mean(np.abs(error)),2) | |
| if 'Load' in actual_col: | |
| persistence = obs.shift(168) # Weekly persistence | |
| else: | |
| persistence = obs.shift(24) # Daily persistence | |
| # Using the whole year's data for rMAE calculations | |
| rmae = round(mae / np.mean(np.abs(obs - persistence)),2) | |
| row_label = 'Load' if 'Load' in actual_col else 'Solar' if 'Solar' in actual_col else 'Wind Offshore' if 'Wind_offshore' in actual_col else 'Wind Onshore' | |
| accuracy_metrics.loc[row_label] = [mae, rmae] | |
| accuracy_metrics.dropna(how='all', inplace=True)# Sort by rMAE (second column) | |
| accuracy_metrics.sort_values(by=accuracy_metrics.columns[1], ascending=True, inplace=True) | |
| accuracy_metrics = accuracy_metrics.round(4) | |
| col1, col2 = st.columns([3, 2]) | |
| with col1: | |
| st.dataframe(accuracy_metrics) | |
| with col2: | |
| st.markdown(""" | |
| <style> | |
| .big-font { | |
| font-size: 20px; | |
| font-weight: 500; | |
| } | |
| </style> | |
| <div class="big-font"> | |
| Equations | |
| </div> | |
| """, unsafe_allow_html=True) | |
| st.markdown(r""" | |
| $\text{MAE} = \frac{1}{n}\sum_{i=1}^{n}|y_i - \hat{y}_i|$ | |
| $\text{rMAE} = \frac{\text{MAE}}{MAE_{\text{Persistence Model}}}$ | |
| """) | |
| st.subheader('ACF plots of Errors') | |
| st.write('The below plots show the ACF (Auto-Correlation Function) for the errors of all three data fields obtained from ENTSO-E: Solar, Wind and Load.') | |
| for i in range(0, len(forecast_columns), 2): | |
| actual_col = forecast_columns[i] | |
| forecast_col = forecast_columns[i + 1] | |
| if forecast_col in data.columns: | |
| obs = data[actual_col] | |
| pred = data[forecast_col] | |
| error = pred - obs | |
| st.write(f"**ACF of Errors for {actual_col}**") | |
| fig, ax = plt.subplots(figsize=(10, 5)) | |
| plot_acf(error.dropna(), ax=ax) | |
| st.pyplot(fig) | |
| acf_values = acf(error.dropna(), nlags=240) | |
| # Section 3: Insights | |
| elif section == 'Insights': | |
| st.header("Insights") | |
| st.write(""" | |
| This section provides insights derived from the data and forecasts. | |
| You can visualize trends, anomalies, and other important findings. | |
| """) | |
| # Scatter plots for correlation between wind, solar, and load | |
| st.subheader('Correlation between Wind, Solar, and Load') | |
| st.write('The below scatter plots are made for checking whether there exists a correlation between all three data fields obtained from ENTSO-E: Solar, Wind and Load.') | |
| combinations = [('Solar_entsoe', 'Load_entsoe'), ('Wind_onshore_entsoe', 'Load_entsoe'), ('Wind_offshore_entsoe', 'Load_entsoe'), ('Solar_entsoe', 'Wind_onshore_entsoe'), ('Solar_entsoe', 'Wind_offshore_entsoe')] | |
| for x_col, y_col in combinations: | |
| if x_col in data.columns and y_col in data.columns: | |
| # For solar combinations, filter out zero values | |
| if 'Solar_entsoe' in x_col: | |
| filtered_data = data[data['Solar_entsoe'] > 0] | |
| x_values = filtered_data[x_col] | |
| y_values = filtered_data[y_col] | |
| else: | |
| x_values = data[x_col] | |
| y_values = data[y_col] | |
| corr_coef = x_values.corr(y_values) | |
| fig = px.scatter( | |
| x=x_values, | |
| y=y_values, | |
| labels={'x': f'{x_col} [MW]', 'y': f'{y_col} [MW]'}, | |
| title=f'{x_col} vs {y_col} (Correlation: {corr_coef:.2f})', color_discrete_sequence=['grey']) | |
| st.plotly_chart(fig) | |
| st.subheader('Weather vs. Generation/Demand') | |
| st.write('The below scatter plots show the relation between weather parameters (i.e., Temperature, Wind Speed) and the generation/demand data from ENTSO-E.') | |
| for weather_col in weather_columns: | |
| for actual_col in ['Load_entsoe', 'Solar_entsoe', 'Wind_onshore_entsoe', 'Wind_offshore_entsoe']: | |
| if weather_col in data.columns and actual_col in data.columns: | |
| clean_label = actual_col.replace('_entsoe', '') | |
| if weather_col == 'Temperature': | |
| fig = px.scatter(x=data[weather_col], y=data[actual_col], labels={'x': f'{weather_col} (°C)', 'y': f'{clean_label} Generation [MW]'}, color_discrete_sequence=['orange']) | |
| else: | |
| fig = px.scatter(x=data[weather_col], y=data[actual_col], labels={'x': f'{weather_col} (km/h)', 'y': clean_label}) | |
| fig.update_layout(title=f'{weather_col} vs {actual_col}') | |
| st.plotly_chart(fig) | |