Spaces:

EDS-lab
/

EnFoBench-PVGeneration

Sleeping

App Files Files Community

attilabalint commited on May 13, 2024

Commit

3d3e872

1 Parent(s): e11310d

initial commit

Browse files

Files changed (7) hide show

.gitignore +1 -0
app.py +82 -0
components.py +280 -0
images/energyville_logo.png +0 -0
images/ku_leuven_logo.png +0 -0
requirements.txt +2 -0
utils.py +29 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ .streamlit/

app.py ADDED Viewed

	@@ -0,0 +1,82 @@

+import streamlit as st
+from components import buildings_view, models_view, performance_view, computation_view
+import utils
+st.set_page_config(page_title="Pv Generation Dashboard", layout="wide")
+PAGES = [
+    "Buildings",
+    "Models",
+    "Performance",
+    "Computational Resources",
+]
+@st.cache_data(ttl=86400)
+def fetch_data():
+    return utils.get_wandb_data(
+        st.secrets["wandb_entity"],
+        "enfobench-pv-generation",
+        st.secrets["wandb_api_key"],
+        job_type="metrics",
+    )
+data = fetch_data()
+models = sorted(data["model"].unique().tolist())
+models_to_plot = set()
+model_groups: dict[str, list[str]] = {}
+for model in models:
+    group, model_name = model.split(".", maxsplit=1)
+    if group not in model_groups:
+        model_groups[group] = []
+    model_groups[group].append(model_name)
+with st.sidebar:
+    left, right = st.columns(
+        2
+    )  # Create two columns within the right column for side-by-side images
+    with left:
+        st.image("./images/ku_leuven_logo.png")
+    with right:
+        st.image("./images/energyville_logo.png")
+    view = st.selectbox("View", PAGES, index=0)
+    st.header("Models to include")
+    left, right = st.columns(2)
+    with left:
+        select_none = st.button("Select None", use_container_width=True)
+        if select_none:
+            for model in models:
+                st.session_state[model] = False
+    with right:
+        select_all = st.button("Select All", use_container_width=True)
+        if select_all:
+            for model in models:
+                st.session_state[model] = True
+    for model_group, models in model_groups.items():
+        st.text(model_group)
+        for model_name in models:
+            to_plot = st.checkbox(model_name, value=True, key=f"{model_group}.{model_name}")
+            if to_plot:
+                models_to_plot.add(f"{model_group}.{model_name}")
+st.title("EnFoBench - Electricity Demand")
+st.divider()
+if view == "Buildings":
+    buildings_view(data)
+elif view == "Models":
+    models_view(data)
+elif view == "Performance":
+    performance_view(data, models_to_plot)
+elif view == "Computational Resources":
+    computation_view(data, models_to_plot)
+else:
+    st.write("Not implemented yet")

components.py ADDED Viewed

	@@ -0,0 +1,280 @@

+import pandas as pd
+import streamlit as st
+import plotly.express as px
+def buildings_view(data):
+    buildings = (
+        data[
+            [
+                "unique_id",
+                "metadata.location_id",
+                "metadata.timezone",
+                "dataset.available_history.days",
+                "metadata.ac_capacity",
+            ]
+        ]
+        .groupby("unique_id")
+        .first()
+        .rename(
+            columns={
+                "metadata.location_id": "Location ID",
+                "metadata.timezone": "Timezone",
+                "dataset.available_history.days": "Available history (days)",
+                "metadata.ac_capacity": "Capacity (kW)",
+            }
+        )
+    )
+    st.metric("Number of buildings", len(buildings))
+    st.divider()
+    st.markdown("### Buildings")
+    st.dataframe(
+        buildings,
+        use_container_width=True,
+        column_config={
+            "Available history (days)": st.column_config.ProgressColumn(
+                "Available history (days)",
+                help="Available training data during the first prediction.",
+                format="%f",
+                min_value=0,
+                max_value=float(buildings['Available history (days)'].max()),
+            ),
+            "Capacity (kW)": st.column_config.ProgressColumn(
+                "Capacity (kW)",
+                help="Available training data during the first prediction.",
+                format="%f",
+                min_value=0,
+                max_value=float(buildings['Capacity (kW)'].max()),
+            ),
+        },
+    )
+def models_view(data):
+    models = (
+        data[
+            [
+                "model",
+                "cv_config.folds",
+                "cv_config.horizon",
+                "cv_config.step",
+                "cv_config.time",
+                "model_info.repository",
+                "model_info.tag",
+                "model_info.variate_type",
+            ]
+        ]
+        .groupby("model")
+        .first()
+        .rename(
+            columns={
+                "cv_config.folds": "CV Folds",
+                "cv_config.horizon": "CV Horizon",
+                "cv_config.step": "CV Step",
+                "cv_config.time": "CV Time",
+                "model_info.repository": "Image Repository",
+                "model_info.tag": "Image Tag",
+                "model_info.variate_type": "Variate type",
+            }
+        )
+    )
+    st.metric("Number of models", len(models))
+    st.divider()
+    st.markdown("### Models")
+    st.dataframe(models, use_container_width=True)
+    left, right = st.columns(2, gap="large")
+    with left:
+        st.markdown("#### Variate types")
+        fig = px.pie(
+            models.groupby("Variate type").size().reset_index(),
+            values=0,
+            names="Variate type",
+        )
+        st.plotly_chart(fig, use_container_width=True)
+    with right:
+        st.markdown("#### Frameworks")
+        _df = models.copy()
+        _df["Framework"] = _df.index.str.split(".").str[0]
+        fig = px.pie(
+            _df.groupby("Framework").size().reset_index(),
+            values=0,
+            names="Framework",
+        )
+        st.plotly_chart(fig, use_container_width=True)
+def performance_view(data: pd.DataFrame, models_to_plot: set[str]):
+    data_to_plot = data[data["model"].isin(models_to_plot)].sort_values(
+        by="model", ascending=True
+    )
+    left, right = st.columns(2, gap="small")
+    with left:
+        metric = st.selectbox("Metric", ["MAE", "RMSE", "MBE", "rMAE"], index=0)
+    with right:
+        aggregation = st.selectbox(
+            "Aggregation", ["min", "mean", "median", "max", "std"], index=1
+        )
+    st.markdown(f"#### {aggregation.capitalize()} {metric} per building")
+    fig = px.box(
+        data_to_plot,
+        x=f"{metric}.{aggregation}",
+        y="model",
+        color="model",
+        points="all",
+    )
+    fig.update_layout(showlegend=False, height=40 * len(models_to_plot))
+    st.plotly_chart(fig, use_container_width=True)
+    st.divider()
+    left, right = st.columns(2, gap="large")
+    with left:
+        x_metric = st.selectbox(
+            "Metric", ["MAE", "RMSE", "MBE", "rMAE"], index=0, key="x_metric"
+        )
+        x_aggregation = st.selectbox(
+            "Aggregation",
+            ["min", "mean", "median", "max", "std"],
+            index=1,
+            key="x_aggregation",
+        )
+    with right:
+        y_metric = st.selectbox(
+            "Aggregation", ["MAE", "RMSE", "MBE", "rMAE"], index=1, key="y_metric"
+        )
+        y_aggregation = st.selectbox(
+            "Aggregation",
+            ["min", "mean", "median", "max", "std"],
+            index=1,
+            key="y_aggregation",
+        )
+    st.markdown(
+        f"#### {x_aggregation.capitalize()} {x_metric} vs {y_aggregation.capitalize()} {y_metric}"
+    )
+    fig = px.scatter(
+        data_to_plot,
+        x=f"{x_metric}.{x_aggregation}",
+        y=f"{y_metric}.{y_aggregation}",
+        color="model",
+    )
+    fig.update_layout(height=600)
+    st.plotly_chart(fig, use_container_width=True)
+    st.divider()
+    left, right = st.columns(2, gap="small")
+    with left:
+        metric = st.selectbox(
+            "Metric", ["MAE", "RMSE", "MBE", "rMAE"], index=0, key="table_metric"
+        )
+    with right:
+        aggregation = st.selectbox(
+            "Aggregation across folds",
+            ["min", "mean", "median", "max", "std"],
+            index=1,
+            key="table_aggregation",
+        )
+    metrics_table = data_to_plot.groupby(["model"]).agg(aggregation, numeric_only=True)[
+        [
+            f"{metric}.min",
+            f"{metric}.mean",
+            f"{metric}.median",
+            f"{metric}.max",
+            f"{metric}.std",
+        ]
+    ]
+    def custom_table(styler):
+        styler.background_gradient(cmap="seismic", axis=0)
+        styler.format(precision=2)
+        # center text and increase font size
+        styler.map(lambda x: "text-align: center; font-size: 14px;")
+        return styler
+    st.markdown(f"#### {aggregation.capitalize()} {metric} stats per model")
+    styled_table = metrics_table.style.pipe(custom_table)
+    st.dataframe(styled_table, use_container_width=True)
+    metrics_table = (
+        data_to_plot.groupby(["model", "unique_id"])
+        .apply(aggregation, numeric_only=True)
+        .reset_index()
+        .pivot(index="model", columns="unique_id", values=f"{metric}.{aggregation}")
+    )
+    def custom_table(styler):
+        styler.background_gradient(cmap="seismic", axis=None)
+        styler.format(precision=2)
+        # center text and increase font size
+        styler.map(lambda x: "text-align: center; font-size: 14px;")
+        return styler
+    st.markdown(f"#### {aggregation.capitalize()} {metric} stats per building")
+    styled_table = metrics_table.style.pipe(custom_table)
+    st.dataframe(styled_table, use_container_width=True)
+def computation_view(data, models_to_plot: set[str]):
+    data_to_plot = data[data["model"].isin(models_to_plot)].sort_values(
+        by="model", ascending=True
+    )
+    st.markdown("#### Computational Resources")
+    fig = px.parallel_coordinates(
+        data_to_plot.groupby("model").mean(numeric_only=True).reset_index(),
+        dimensions=[
+            "model",
+            "resource_usage.CPU",
+            "resource_usage.memory",
+            "MAE.mean",
+            "RMSE.mean",
+            "MBE.mean",
+            "rMAE.mean",
+        ],
+        color="rMAE.mean",
+        color_continuous_scale=px.colors.diverging.Portland,
+    )
+    st.plotly_chart(fig, use_container_width=True)
+    st.divider()
+    left, center, right = st.columns(3, gap="small")
+    with left:
+        metric = st.selectbox("Metric", ["MAE", "RMSE", "MBE", "rMAE"], index=0)
+    with center:
+        aggregation_per_building = st.selectbox(
+            "Aggregation per building", ["min", "mean", "median", "max", "std"], index=1
+        )
+    with right:
+        aggregation_per_model = st.selectbox(
+            "Aggregation per model", ["min", "mean", "median", "max", "std"], index=1
+        )
+    st.markdown(
+        f"#### {aggregation_per_model.capitalize()} {aggregation_per_building.capitalize()} {metric} vs CPU usage"
+    )
+    aggregated_data = (
+        data_to_plot.groupby("model")
+        .agg(aggregation_per_building, numeric_only=True)
+        .reset_index()
+    )
+    fig = px.scatter(
+        aggregated_data,
+        x="resource_usage.CPU",
+        y=f"{metric}.{aggregation_per_model}",
+        color="model",
+        log_x=True,
+    )
+    fig.update_layout(height=600)
+    st.plotly_chart(fig, use_container_width=True)

images/energyville_logo.png ADDED Viewed

images/ku_leuven_logo.png ADDED Viewed

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ wandb==0.17.0
2	+ plotly==5.20.0

utils.py ADDED Viewed

	@@ -0,0 +1,29 @@

+import pandas as pd
+import wandb
+def get_wandb_data(entity: str, project: str, api_key: str, job_type: str) -> pd.DataFrame:
+    api = wandb.Api(api_key=api_key)
+    # Project is specified by <entity/project-name>
+    filter_dict = {"jobType": job_type}
+    runs = api.runs(f"{entity}/{project}", filters=filter_dict)
+    summary_list, config_list, name_list = [], [], []
+    for run in runs:
+        # .summary contains the output keys/values for metrics like accuracy.
+        #  We call ._json_dict to omit large files
+        summary_list.append(run.summary._json_dict)
+        # .config contains the hyperparameters.
+        #  We remove special values that start with _.
+        config_list.append({k: v for k, v in run.config.items()})
+        # .name is the human-readable name of the run.
+        name_list.append(run.name)
+    summary_df = pd.json_normalize(summary_list, max_level=1)
+    config_df = pd.json_normalize(config_list, max_level=2)
+    runs_df = pd.concat([summary_df, config_df], axis=1)
+    runs_df.index = name_list
+    return runs_df