File size: 5,111 Bytes
2152c06 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 | """Functions to clean and filter data for use in utility data visualizations"""
import pandas as pd
HOURS_PER_YEAR = 8760
def get_state_variance(df: pd.DataFrame) -> pd.DataFrame:
"""Summarizes key metrics for states to help choose state for analysis"""
agg_df = df.groupby('Utility.State').agg({
'Utility.Name': 'nunique', # Want = More utilites to analyze
'Utility.Type': 'nunique', # Want = More utility types to compare
'ResidentialUnitPrice': 'std', # Want = High std for varied utilities
'SystemLossPercentage': 'max', # Want = Great outlier stories
'IndustrialRevenueRatio': 'mean' # Want = High for industrial bias
})
agg_df.rename(columns={
'Utility.Name': '# Utilities',
'Utility.Type': '# Utility Types',
'Utility.State': 'State',
'ResidentialUnitPrice': 'Residential Price Std. Dev.',
'SystemLossPercentage': 'System Loss %',
'IndustrialRevenueRatio': 'Industrial Revenue %'
}, inplace=True)
agg_df.columns = [f"{col[0]}_{col[1]}" if isinstance(col, tuple) else col
for col in agg_df.columns.values]
agg_df.sort_values('Residential Price Std. Dev.',
ascending=False, inplace=True)
return agg_df.reset_index().head(10)
def get_state_data(state: str, df: pd.DataFrame) -> pd.DataFrame:
"""Select and filter out relevant columns for analysis"""
keep_columns = ["Utility.Name", "Utility.State", "Utility.Type",
"Sources.Total", "Sources.Generation", "Sources.Purchased",
"Sources.Other", "Retail.Residential.Revenue", "Retail.Residential.Sales",
"Retail.Residential.Customers", "Retail.Industrial.Revenue",
"Retail.Industrial.Sales", "Retail.Industrial.Customers",
"Uses.Retail", "Uses.Losses", "Uses.Resale",
"Uses.No Charge", "Uses.Consumed", "Uses.Total",
"Demand.Summer Peak", "Revenues.Retail"]
return df[df["Utility.State"] == state][keep_columns].copy()
def prepare_data(df: pd.DataFrame) -> pd.DataFrame:
"""Perform calculations for key metrics and add them to the data"""
# Residential $ per MWh
df['ResidentialUnitPrice'] = (df['Retail.Residential.Revenue']
/ df['Retail.Residential.Sales']) * 1000
df['ResidentialUnitPrice'] = df['ResidentialUnitPrice'].fillna(0)
# Industrial $ per MWh
df['IndustrialUnitPrice'] = df['Retail.Industrial.Revenue'] / \
df['Retail.Industrial.Sales']
df['IndustrialUnitPrice'] = df['IndustrialUnitPrice'].fillna(0)
# % Dependency on industrial revenue
df['IndustrialRevenueRatio'] = df['Retail.Industrial.Revenue'] / \
df['Revenues.Retail'] * 100
df['IndustrialRevenueRatio'] = df['IndustrialRevenueRatio'].fillna(0)
# Equity Metric
df['PriceSpread'] = df['ResidentialUnitPrice'] - df['IndustrialUnitPrice']
# Efficiency Metric
df['SystemLossPercentage'] = (
df['Uses.Losses'] / df['Sources.Total']) * 100
# Operational metric of 'stress' on system
df['LoadFactor'] = df['Sources.Total'] / \
(df['Demand.Summer Peak'] * HOURS_PER_YEAR)
df['LoadFactor'] = df['LoadFactor'].apply(
lambda load: 0 if load == float('inf') else load)
return df
def get_customer_utilities(df: pd.DataFrame, sector="Residential") -> pd.DataFrame:
"""Filter data by customer type for use in plots"""
if sector == "Residential":
return df[df["Retail.Residential.Customers"] > 0]
elif sector == "Industrial":
return df[df["Retail.Industrial.Customers"] > 0]
return df[(df["Retail.Residential.Customers"] > 0)
& (df["Retail.Industrial.Customers"] > 0)]
def get_residential_load_factor(df: pd.DataFrame) -> pd.DataFrame:
"""Filter data by customer type and utilities with a load factor"""
df = get_customer_utilities(df, "Residential")
return df[df["LoadFactor"] > 0]
def get_residential_sys_loss(df: pd.DataFrame) -> pd.DataFrame:
"""Filter data by customer type and utilities with system loss"""
df = get_customer_utilities(df, "Residential")
return df[df["SystemLossPercentage"] > 0]
def get_utility_usage(utility: pd.Series, level: str = "State") -> pd.DataFrame:
"""Create data with percentages of utilty usage within the sankey plot"""
# Convert raw values to percentages of the 'Total Sources'
keys = [
'Sources.Generation', 'Sources.Purchased', 'Sources.Other',
'Uses.Retail', 'Uses.Resale', 'Uses.Losses',
'Uses.Consumed', 'Uses.No Charge'
]
named_utility = (utility[keys] / utility['Sources.Total']) * 100
if level == "State":
named_utility['Utility.Name'] = "State of " + \
utility['Utility.State'][0:2]
elif level == "US":
named_utility['Utility.Name'] = "United States"
else:
named_utility['Utility.Name'] = utility['Utility.Name']
return named_utility
|