import sys
import os
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))

import src.util.plot_util as plot_util
import src.util.data_util as data_util
import data.electricity as electricity

import streamlit as st
import pandas as pd

st.set_page_config(
    page_title="Utility Efficiency & Rates",
    layout="wide"
)


@st.cache_data
def load_data(state: str) -> pd.DataFrame:
    utility = electricity.get_utility()
    df = pd.json_normalize(utility)
    state_df = data_util.get_state_data(state, df)
    return data_util.prepare_data(state_df), df


with st.sidebar:
    state = st.selectbox(
        "Select State",
        options=["NY", "AK", "RI", "ME", "CA", "NJ", "CT", "NH", "MA", "AZ"],
        index=0
    )
    st.markdown("---")
    st.markdown(
        "This app explores whether operational inefficiencies "
        "— energy losses and poor load factors — correlate with "
        "higher residential electricity rates."
    )
    st.markdown(
        "[View on GitHub](https://github.com/chalseokorom/utilities-equity-efficiency-gap)")

state_df, full_df = load_data(state)

st.title("Electricity Utility Fairness Residential Rate Analysis")
st.caption(
    f"Exploring {state} utilities — "
    f"{len(state_df)} utilities across "
    f"{state_df['Utility.Type'].nunique()} ownership models"
)
st.divider()

# ── Section 1: Fairness Audit ─────────────────────────────────
st.header("Fairness Audit — Efficiency vs. Residential Price")
st.caption(
    "Do utilities with higher energy losses or lower load factors "
    "charge residential customers more per MWh?"
)

scatter_df = data_util.get_residential_sys_loss(state_df)
scatter_df = data_util.get_residential_load_factor(scatter_df).round(2)

st.plotly_chart(
    plot_util.get_fairness_dual_y_scatter_plot(scatter_df),
    use_container_width=True
)
st.divider()

# ── Section 2: Ownership Models ───────────────────────────────
st.header("Ownership Model — Price Spread")

st.plotly_chart(
    plot_util.get_price_spread_strip_plot(state_df.round(2)),
    use_container_width=True
)
st.divider()

# ── Section 3: Rate Disparity ─────────────────────────────────
st.header("Rate Disparity — Residential vs. Industrial")

n_utilities = st.slider(
    "Number of utilities to show",
    min_value=5, max_value=20, value=10, step=1
)

both_df = data_util.get_customer_utilities(state_df, sector="Both").round(2)
st.plotly_chart(
    plot_util.get_rate_disparity_dumbbell_plot(both_df, top_n=n_utilities),
    use_container_width=True
)
st.divider()

# ── Section 4: Energy Flow ────────────────────────────────────
st.header("Energy Flow — Sources & Uses")
# Row 1: State aggregate
st.subheader(f"{state} — Aggregate Energy Flow")
st.caption(
    "How all utilities in this state collectively source and distribute energy.")

numeric_sum = state_df.select_dtypes(include='number').sum()
numeric_sum['Utility.Name'] = state
state_flow = data_util.get_utility_usage(numeric_sum, level="US")

st.plotly_chart(
    plot_util.get_energy_use_sankey_plot(state_flow),
    use_container_width=True
)

st.divider()

# Row 2: Individual utility explorer
st.subheader("Individual Utility — Energy Flow")
st.caption(
    "Select a utility to see its specific energy breakdown. "
    "Compare the Losses band against the state aggregate above."
)

utility_names = sorted(state_df['Utility.Name'].dropna().unique())
selected_utility = st.selectbox("Select a utility", options=utility_names)
utility_row = state_df[state_df['Utility.Name'] == selected_utility].iloc[0]
utility_flow = data_util.get_utility_usage(utility_row, level="Utility")

st.plotly_chart(
    plot_util.get_energy_use_sankey_plot(utility_flow),
    use_container_width=True
)