""" Functions to create and retrieves interactive plotly charts """ import os import pandas as pd import numpy as np from scipy.stats import linregress import plotly.express as px import plotly.graph_objects as go import plotly.io as pio from plotly.subplots import make_subplots def get_state_variance_table(df: pd.DataFrame) -> go.Figure: """Retrieve table of states with highest mean residential unit price""" target, row_h, header_h = 7, 35, 50 fig = go.Figure(go.Table( header=dict( values=list(df.columns), fill_color='#f8f9fa', font=dict(size=12, family="Arial Black",), align='center', height=header_h ), cells=dict( values=df.values.T, fill_color=[['dodgerblue' if i == target else 'white' for i in range(10)]], font=dict( color=[['white' if i == target else 'black' for i in range(10)]], size=12), height=row_h, align='center', format=[None, None, None, ".2f", ".2f", ".2f"] ) )) fig.update_layout( margin=dict(l=5, r=5, t=5, b=5), height=row_h * (len(df) + 2), autosize=False ) return fig def get_price_spread_strip_plot(df: pd.DataFrame): """ Strip plot of Price Spread by ownership model. Shows which ownership types most consistently charge residential customers more than industrial ones. """ fig = px.strip( df[df.PriceSpread > 0], x="Utility.Type", y="PriceSpread", color="Utility.Type", hover_name="Utility.Name", color_discrete_sequence=px.colors.qualitative.Prism, title="Rate Equity by Ownership Model: " "Residential Premium Over Industrial Rates", labels={ "Utility.Type": "Type", "PriceSpread": "Residential Premium ($/MWh)", }, template="plotly_white" ) fig.update_layout(showlegend=False) return fig def get_key_metrics_corr_matrix(df: pd.DataFrame) -> go.Figure: """Correlation matrix for key analysis metrics""" key_metrics = { 'SystemLossPercentage': 'System Loss %', 'LoadFactor': 'Load Factor', 'IndustrialRevenueRatio': 'Industrial Revenue %', 'PriceSpread': 'Price Spread', } corr_matrix = df[list(key_metrics.keys())].corr() return px.imshow( corr_matrix.round(2), x=list(key_metrics.values()), y=list(key_metrics.values()), color_continuous_scale='mint', text_auto=True, aspect="auto", title='Statistical Significance: Correlation Heatmap of Key Metrics', labels=dict(color="Score"), template='plotly_white') def add_fairness_trendline(fig: go.Figure, x_data: pd.Series, y_data: pd.Series, row: int, col: int) -> None: """Calculates OLS and adds centered stats inside the plot to avoid title overlap.""" mask = ~np.isnan(x_data) & ~np.isnan(y_data) x_clean, y_clean = x_data[mask], y_data[mask] if len(x_clean) > 1: # Get linear regression result = linregress(x_clean, y_clean) # Trendline coordinates x_range = np.array([x_clean.min(), x_clean.max()]) y_range = result.slope * x_range + result.intercept # Add Trendline fig.add_trace( go.Scatter( x=x_range, y=y_range, mode='lines', line=dict(color='black', width=2, dash='dash'), name='Overall Trend', legendgroup='trendline', showlegend=(row == 1 and col == 1), hoverinfo='skip' ), row=row, col=col) # 2. Annotation stats box fig.add_annotation( xref=f"x{col if col > 1 else ''} domain", yref="y domain", x=0.5, # Horizontal center y=0.92, # Lowered to 92% of height (inside the plot) xanchor="center", yanchor="top", # Box hangs downward from the y=0.92 point text=f"R²: {result.rvalue**2:.3f} | p: {result.pvalue:.4e}", showarrow=False, align="center", # High opacity for readability bgcolor="rgba(255, 255, 255, 0.85)", bordercolor="rgba(0,0,0,0.3)", borderwidth=1, font=dict(size=10)) def get_fairness_dual_y_scatter_plot(df: pd.DataFrame) -> go.Figure: """Get dual y-axis scatter plot of utility fairness metrics""" fig = make_subplots( rows=1, cols=2, shared_yaxes=True, horizontal_spacing=0.05, subplot_titles=('System Loss vs Price', 'Load Factor vs Price')) df['BubbleSize'] = np.log1p(df['Retail.Residential.Customers']) colors = px.colors.qualitative.Prism types = df['Utility.Type'].unique() color_map = {t: colors[i % len(colors)] for i, t in enumerate(types)} # Plot 1: System Loss for t in types: mask = df['Utility.Type'] == t fig.add_trace( go.Scatter( x=df[mask]['SystemLossPercentage'], y=df[mask]['ResidentialUnitPrice'], name=t, hovertext=df[mask]['Utility.Name'], mode='markers', marker=dict(color=color_map[t], size=df[mask]['BubbleSize']), hovertemplate="%{hovertext}
Loss: %{x}%
Price: $%{y}", showlegend=True), row=1, col=1) add_fairness_trendline( fig, df['SystemLossPercentage'], df['ResidentialUnitPrice'], 1, 1) # Plot 2: Load Factor for t in types: mask = df['Utility.Type'] == t fig.add_trace( go.Scatter( x=df[mask]['LoadFactor'], y=df[mask]['ResidentialUnitPrice'], name=t, mode='markers', marker=dict(color=color_map[t], size=df[mask]['BubbleSize']), hovertext=df[mask]['Utility.Name'], hovertemplate="%{hovertext}
Load: %{x}
Price: $%{y}", showlegend=False), row=1, col=2) add_fairness_trendline(fig, df['LoadFactor'], df['ResidentialUnitPrice'], 1, 2) fig.update_layout( template='plotly_white', title_text='Fairness Audit: Correlation of Utility Metrics to Residential Price', legend_title_text="Ownership Model", height=600) fig.update_yaxes(title_text='Residential Price ($/MWh)', row=1, col=1) fig.update_xaxes(title_text='System Energy Loss (%)', row=1, col=1) fig.update_xaxes(title_text='Load Factor', row=1, col=2) return fig def get_rate_disparity_dumbbell_plot(df: pd.DataFrame, top_n: int = 10) -> go.Figure: """Get dumbbell plot of highest disparities between industrial/residential rates""" # Sort by spread to show the most "unfair" utilities at the top df_sorted = df[df.PriceSpread > 0].sort_values( 'PriceSpread', ascending=True).tail(top_n) fig = go.Figure() # Add lines connecting the dots for i, row in df_sorted.iterrows(): fig.add_shape( type='line', x0=row['IndustrialUnitPrice'], x1=row['ResidentialUnitPrice'], y0=row['Utility.Name'], y1=row['Utility.Name'], line=dict(color='lightgrey', width=2)) # Industrial dumbbells fig.add_trace(go.Scatter( x=df_sorted['IndustrialUnitPrice'], y=df_sorted['Utility.Name'], mode='markers', name='Industrial Rate', marker=dict(color='#1f77b4', size=10))) # Residential dumbbells fig.add_trace(go.Scatter( x=df_sorted['ResidentialUnitPrice'], y=df_sorted['Utility.Name'], mode='markers', name='Residential Rate', marker=dict(color='#d62728', size=10))) fig.update_layout(title="Top Rate Disparites", xaxis_title="Rate ($/MWh)", yaxis_title="") return fig def add_utility_dropdown(fig: go.Figure, df: pd.DataFrame) -> go.Figure: """Post-processing function to add a utility dropdown justified right.""" buttons = [] for _, r in df.iterrows(): buttons.append(dict( method="update", label=r["Utility.Name"], args=[ {"link.value": [[ r["Sources.Generation"], r["Sources.Purchased"], r["Sources.Other"], r["Uses.Retail"], r["Uses.Resale"], r["Uses.Losses"], r["Uses.Consumed"], r["Uses.No Charge"] ]]}, {"title.text": f"Energy Flow: {r['Utility.Name']}"} ] )) first_row = df.iloc[0] initial_values = [ first_row["Sources.Generation"], first_row["Sources.Purchased"], first_row["Sources.Other"], first_row["Uses.Retail"], first_row["Uses.Resale"], first_row["Uses.Losses"], first_row["Uses.Consumed"], first_row["Uses.No Charge"] ] # 2. Directly assign intial values to the intial Sankey fig.data[0].link.value = initial_values # 3. Apply the layout and the dropdown menu fig.update_layout( title_text=f"Energy Flow: {first_row['Utility.Name']}", updatemenus=[dict( buttons=buttons, direction="down", showactive=True, x=1.0, xanchor="right", y=2, yanchor="top", active=0 )], ) return fig def get_energy_use_sankey_plot(row: pd.DataFrame) -> go.Figure: """Get energy usage sankey plot""" labels = ["Generated", "Purchased", "Other", "Uses", "Retail Sales", "Resale", "Losses", "Consumed", "No Charge"] fig = go.Figure(data=[go.Sankey( valueformat=".1f", valuesuffix="%", node=dict( label=labels, color=px.colors.qualitative.Prism), link=dict( source=[0, 1, 2, 3, 3, 3, 3, 3], target=[3, 3, 3, 4, 5, 6, 7, 8], value=[ row["Sources.Generation"], row["Sources.Purchased"], row["Sources.Other"], row["Uses.Retail"], row["Uses.Resale"], row["Uses.Losses"], row["Uses.Consumed"], row["Uses.No Charge"] ], ))]) fig.update_layout( title_text=f"Energy Flow: {row['Utility.Name']}", hovermode='x') return fig def export_plots_as_svg(plots: list[go.Figure]) -> None: """Export plots as high-definition SVGs to the 'images' folder""" script_dir = os.path.dirname(os.path.abspath(__file__)) target_dir = os.path.join(script_dir, "..", "..", "images") if not os.path.exists(target_dir): os.makedirs(target_dir) pio.write_images(fig=plots, file=["images/top_ten_state_res_variance_table.svg", "images/utility_type_strip_plot.svg", "images/key_metrics_corr_heatmap.svg", "images/rate_fairness_dual_y_scatter_plot.svg", "images/rate_disparity_dumbbell_plot.svg", "images/energy_usage_ny_sankey_chart.svg", "images/energy_usage_us_sankey_chart.svg"])