File size: 5,111 Bytes
2152c06
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
"""Functions to clean and filter data for use in utility data visualizations"""

import pandas as pd

HOURS_PER_YEAR = 8760


def get_state_variance(df: pd.DataFrame) -> pd.DataFrame:
    """Summarizes key metrics for states to help choose state for analysis"""

    agg_df = df.groupby('Utility.State').agg({
        'Utility.Name': 'nunique',                # Want = More utilites to analyze
        'Utility.Type': 'nunique',                # Want = More utility types to compare
        'ResidentialUnitPrice': 'std',            # Want = High std for varied utilities
        'SystemLossPercentage': 'max',            # Want = Great outlier stories
        'IndustrialRevenueRatio': 'mean'          # Want = High for industrial bias
    })

    agg_df.rename(columns={
        'Utility.Name': '# Utilities',
        'Utility.Type': '# Utility Types',
        'Utility.State': 'State',
        'ResidentialUnitPrice': 'Residential Price Std. Dev.',
        'SystemLossPercentage': 'System Loss %',
        'IndustrialRevenueRatio': 'Industrial Revenue %'
    }, inplace=True)

    agg_df.columns = [f"{col[0]}_{col[1]}" if isinstance(col, tuple) else col
                      for col in agg_df.columns.values]
    agg_df.sort_values('Residential Price Std. Dev.',
                       ascending=False, inplace=True)

    return agg_df.reset_index().head(10)


def get_state_data(state: str, df: pd.DataFrame) -> pd.DataFrame:
    """Select and filter out relevant columns for analysis"""
    keep_columns = ["Utility.Name", "Utility.State", "Utility.Type",
                    "Sources.Total", "Sources.Generation", "Sources.Purchased",
                    "Sources.Other", "Retail.Residential.Revenue", "Retail.Residential.Sales",
                    "Retail.Residential.Customers", "Retail.Industrial.Revenue",
                    "Retail.Industrial.Sales", "Retail.Industrial.Customers",
                    "Uses.Retail", "Uses.Losses", "Uses.Resale",
                    "Uses.No Charge", "Uses.Consumed", "Uses.Total",
                    "Demand.Summer Peak", "Revenues.Retail"]

    return df[df["Utility.State"] == state][keep_columns].copy()


def prepare_data(df: pd.DataFrame) -> pd.DataFrame:
    """Perform calculations for key metrics and add them to the data"""
    # Residential $ per MWh
    df['ResidentialUnitPrice'] = (df['Retail.Residential.Revenue']
                                  / df['Retail.Residential.Sales']) * 1000
    df['ResidentialUnitPrice'] = df['ResidentialUnitPrice'].fillna(0)

    # Industrial $ per MWh
    df['IndustrialUnitPrice'] = df['Retail.Industrial.Revenue'] / \
        df['Retail.Industrial.Sales']
    df['IndustrialUnitPrice'] = df['IndustrialUnitPrice'].fillna(0)

    # % Dependency on industrial revenue
    df['IndustrialRevenueRatio'] = df['Retail.Industrial.Revenue'] / \
        df['Revenues.Retail'] * 100
    df['IndustrialRevenueRatio'] = df['IndustrialRevenueRatio'].fillna(0)

    # Equity Metric
    df['PriceSpread'] = df['ResidentialUnitPrice'] - df['IndustrialUnitPrice']

    # Efficiency Metric
    df['SystemLossPercentage'] = (
        df['Uses.Losses'] / df['Sources.Total']) * 100

    # Operational metric of 'stress' on system
    df['LoadFactor'] = df['Sources.Total'] / \
        (df['Demand.Summer Peak'] * HOURS_PER_YEAR)
    df['LoadFactor'] = df['LoadFactor'].apply(
        lambda load: 0 if load == float('inf') else load)

    return df


def get_customer_utilities(df: pd.DataFrame, sector="Residential") -> pd.DataFrame:
    """Filter data by customer type for use in plots"""
    if sector == "Residential":
        return df[df["Retail.Residential.Customers"] > 0]

    elif sector == "Industrial":
        return df[df["Retail.Industrial.Customers"] > 0]

    return df[(df["Retail.Residential.Customers"] > 0)
              & (df["Retail.Industrial.Customers"] > 0)]


def get_residential_load_factor(df: pd.DataFrame) -> pd.DataFrame:
    """Filter data by customer type and utilities with a load factor"""
    df = get_customer_utilities(df, "Residential")

    return df[df["LoadFactor"] > 0]


def get_residential_sys_loss(df: pd.DataFrame) -> pd.DataFrame:
    """Filter data by customer type and utilities with system loss"""
    df = get_customer_utilities(df, "Residential")

    return df[df["SystemLossPercentage"] > 0]


def get_utility_usage(utility: pd.Series, level: str = "State") -> pd.DataFrame:
    """Create data with percentages of utilty usage within the sankey plot"""

    # Convert raw values to percentages of the 'Total Sources'
    keys = [
        'Sources.Generation', 'Sources.Purchased', 'Sources.Other',
        'Uses.Retail', 'Uses.Resale', 'Uses.Losses',
        'Uses.Consumed', 'Uses.No Charge'
    ]

    named_utility = (utility[keys] / utility['Sources.Total']) * 100

    if level == "State":
        named_utility['Utility.Name'] = "State of " + \
            utility['Utility.State'][0:2]
    elif level == "US":
        named_utility['Utility.Name'] = "United States"
    else:
        named_utility['Utility.Name'] = utility['Utility.Name']

    return named_utility