github-actions[bot] commited on
Commit
035a20c
·
1 Parent(s): e834c00

Deploy from GitHub Actions

Browse files
Files changed (2) hide show
  1. analysis.py +246 -0
  2. ui/pages/sector_compare.py +56 -18
analysis.py CHANGED
@@ -3001,3 +3001,249 @@ def plot_sector_line_charts(
3001
  plot_data.insert(0, "parameter", parameter)
3002
 
3003
  return fig, param_data, plot_data
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3001
  plot_data.insert(0, "parameter", parameter)
3002
 
3003
  return fig, param_data, plot_data
3004
+
3005
+
3006
+ @timer(include_params=True)
3007
+ def plot_sector_box_charts(
3008
+ df: pd.DataFrame,
3009
+ parameter: str,
3010
+ color_scale: list[str] = COLOR_SCALE,
3011
+ ) -> tuple[Figure, pd.DataFrame, pd.DataFrame]:
3012
+ """
3013
+ Create box plots showing the distribution of parameter values by sector and year.
3014
+
3015
+ Parameters:
3016
+ -----------
3017
+ df : pd.DataFrame
3018
+ Input dataframe
3019
+ parameter : str
3020
+ Parameter to plot
3021
+ color_scale : list[str]
3022
+ List of colors to use for sectors
3023
+
3024
+ Returns:
3025
+ --------
3026
+ tuple[Figure, pd.DataFrame, pd.DataFrame]
3027
+ - Figure: Matplotlib figure containing the box plots
3028
+ - DataFrame: Filtered dataframe containing the raw data used in the plot
3029
+ - DataFrame: Contains the plotted data points: mean, median, and quartiles
3030
+ """
3031
+ # Define consistent colors for styling
3032
+ GREY10 = "#1a1a1a"
3033
+ GREY30 = "#4d4d4d"
3034
+ GREY40 = "#666666"
3035
+ GREY75 = "#bfbfbf"
3036
+ GREY91 = "#e8e8e8"
3037
+
3038
+ # Filter data for parameter
3039
+ param_data = df[df["Org_Analyte_Name"] == parameter].copy()
3040
+
3041
+ # For Salinity, exclude Fresh Water Lakes
3042
+ if parameter == "Salinity":
3043
+ param_data = param_data[param_data["Sector"] != "Freshwater Lakes"]
3044
+
3045
+ # Calculate year and prepare data
3046
+ param_data["Reporting_Year"] = param_data["Activity_Start_Date_Time"].dt.year
3047
+ sectors = sorted(param_data["Sector"].unique())
3048
+ years = sorted(param_data["Reporting_Year"].unique())
3049
+
3050
+ # Determine if log scale should be used
3051
+ use_log_scale = parameter in [
3052
+ "Turbidity",
3053
+ "Fecal Coliform (MPN)",
3054
+ "Total Nitrogen",
3055
+ "Total Phosphorus",
3056
+ ]
3057
+
3058
+ # Create figure with single column layout - increased width from 8 to 12
3059
+ fig = plt.figure(figsize=(15, 2.5 * len(sectors)))
3060
+
3061
+ # Create box plots
3062
+ for idx, sector in enumerate(sectors):
3063
+ ax = plt.subplot(len(sectors), 1, idx + 1)
3064
+ sector_data = param_data[param_data["Sector"] == sector]
3065
+
3066
+ bp = ax.boxplot(
3067
+ [
3068
+ sector_data[sector_data["Reporting_Year"] == year]["Org_Result_Value"]
3069
+ for year in years
3070
+ ],
3071
+ labels=years, # type: ignore
3072
+ patch_artist=True,
3073
+ medianprops=dict(color="black"),
3074
+ flierprops=dict(
3075
+ marker="o",
3076
+ markerfacecolor=color_scale[idx],
3077
+ alpha=0.5,
3078
+ markersize=4,
3079
+ ),
3080
+ boxprops=dict(facecolor=color_scale[idx], alpha=0.6),
3081
+ widths=0.6,
3082
+ positions=range(len(years)),
3083
+ )
3084
+
3085
+ # Set proper x-axis limits with padding
3086
+ ax.set_xlim(-0.5, len(years) - 0.5)
3087
+
3088
+ ax.set_title(sector, pad=10, fontsize=10, color=GREY30)
3089
+
3090
+ if use_log_scale:
3091
+ ax.set_yscale("log")
3092
+
3093
+ # Customize appearance
3094
+ ax.grid(True, axis="y", alpha=0.15, linestyle="-", color="gray")
3095
+ ax.spines["top"].set_visible(False)
3096
+ ax.spines["right"].set_visible(False)
3097
+ ax.spines["left"].set_visible(False)
3098
+ ax.spines["bottom"].set_color(GREY40)
3099
+ ax.spines["bottom"].set_linewidth(0.5)
3100
+
3101
+ # Customize tick parameters
3102
+ ax.tick_params(axis="both", which="both", length=0, colors=GREY40)
3103
+
3104
+ # Update x-axis tick handling - removed rotation
3105
+ if len(years) > 8:
3106
+ # Show roughly every nth tick to get 8 or fewer ticks
3107
+ n = len(years) // 8 + 1
3108
+ visible_ticks = range(0, len(years), n)
3109
+ visible_years = years[::n]
3110
+ ax.set_xticks(visible_ticks)
3111
+ ax.set_xticklabels(visible_years, ha="center", weight=500, color=GREY40)
3112
+ else:
3113
+ ax.set_xticks(range(len(years)))
3114
+ ax.set_xticklabels(years, ha="center", weight=500, color=GREY40)
3115
+
3116
+ # Add overall title
3117
+ fig.suptitle(
3118
+ f"{parameter} Distribution by Sector", fontsize=14, y=1.02, color=GREY30
3119
+ )
3120
+
3121
+ # Adjust layout - removed bottom adjustment since we no longer have rotated labels
3122
+ plt.tight_layout()
3123
+ plt.subplots_adjust(hspace=0.4)
3124
+
3125
+ # Create stats DataFrame to store box plot statistics
3126
+ stats_data = []
3127
+ for sector in sectors:
3128
+ sector_data = param_data[param_data["Sector"] == sector]
3129
+ for year in years:
3130
+ year_data = sector_data[sector_data["Reporting_Year"] == year][
3131
+ "Org_Result_Value"
3132
+ ]
3133
+ if not year_data.empty:
3134
+ stats = {
3135
+ "Sector": sector,
3136
+ "Reporting_Year": year,
3137
+ "mean": year_data.mean(),
3138
+ "median": year_data.median(),
3139
+ "q1": year_data.quantile(0.25),
3140
+ "q3": year_data.quantile(0.75),
3141
+ "min": year_data.min(),
3142
+ "max": year_data.max(),
3143
+ "count": len(year_data),
3144
+ }
3145
+ stats_data.append(stats)
3146
+
3147
+ # Create stats DataFrame and add parameter column
3148
+ stats_df = pd.DataFrame(stats_data)
3149
+ stats_df.insert(0, "parameter", parameter)
3150
+
3151
+ return fig, param_data, stats_df
3152
+
3153
+
3154
+ @timer(include_params=True)
3155
+ def plot_sector_heatmap(
3156
+ df: pd.DataFrame,
3157
+ parameter: str,
3158
+ show_values: bool = False,
3159
+ ) -> tuple[Figure, pd.DataFrame, pd.DataFrame]:
3160
+ """
3161
+ Create a heatmap showing annual means by sector and year.
3162
+
3163
+ Parameters:
3164
+ -----------
3165
+ df : pd.DataFrame
3166
+ Input dataframe
3167
+ parameter : str
3168
+ Name of the parameter to plot
3169
+ show_values : bool, default=False
3170
+ Whether to display mean values inside each cell
3171
+
3172
+ Returns:
3173
+ --------
3174
+ tuple[Figure, pd.DataFrame, pd.DataFrame]
3175
+ - Figure: Matplotlib figure containing the heatmap
3176
+ - DataFrame: Filtered dataframe containing the raw data used in the plot
3177
+ - DataFrame: Contains the plotted data points: mean values for each sector and year
3178
+ """
3179
+ # Filter data for selected parameter
3180
+ param_data = df[df["Org_Analyte_Name"] == parameter].copy()
3181
+
3182
+ # For Salinity, exclude Fresh Water Lakes
3183
+ if parameter == "Salinity":
3184
+ param_data = param_data[param_data["Sector"] != "Fresh Water Lakes"]
3185
+
3186
+ # Calculate annual means
3187
+ plot_data = (
3188
+ param_data.groupby(["Reporting_Year", "Sector"], observed=True)[
3189
+ "Org_Result_Value"
3190
+ ]
3191
+ .mean()
3192
+ .reset_index()
3193
+ .pivot(index="Sector", columns="Reporting_Year", values="Org_Result_Value")
3194
+ )
3195
+
3196
+ # Create figure with extra space at bottom for colorbar
3197
+ fig, ax = plt.subplots(figsize=(12, len(plot_data) * 0.8))
3198
+
3199
+ # Create heatmap with small gaps between cells
3200
+ im = ax.imshow(plot_data, aspect="auto", cmap="YlOrRd")
3201
+
3202
+ # Customize appearance
3203
+ ax.set_xticks(np.arange(len(plot_data.columns)))
3204
+ ax.set_yticks(np.arange(len(plot_data.index)))
3205
+ ax.set_xticklabels(plot_data.columns)
3206
+ ax.set_yticklabels(plot_data.index)
3207
+
3208
+ # Remove all spines
3209
+ ax.spines["top"].set_visible(False)
3210
+ ax.spines["right"].set_visible(False)
3211
+ ax.spines["left"].set_visible(False)
3212
+ ax.spines["bottom"].set_visible(False)
3213
+
3214
+ # Remove all tick marks but keep labels
3215
+ ax.tick_params(axis="both", which="both", length=0)
3216
+
3217
+ # Add small gaps between cells
3218
+ ax.set_xticks(np.arange(plot_data.shape[1] + 1) - 0.5, minor=True)
3219
+ ax.set_yticks(np.arange(plot_data.shape[0] + 1) - 0.5, minor=True)
3220
+ ax.grid(which="minor", color="w", linestyle="-", linewidth=2)
3221
+
3222
+ # Set x-axis labels horizontal
3223
+ plt.setp(ax.get_xticklabels(), rotation=0)
3224
+
3225
+ # Add value annotations if requested
3226
+ if show_values:
3227
+ for i in range(len(plot_data.index)):
3228
+ for j in range(len(plot_data.columns)):
3229
+ value = plot_data.iloc[i, j]
3230
+ if not pd.isna(value):
3231
+ text = f"{value:.1f}"
3232
+ ax.text(j, i, text, ha="center", va="center", color="black")
3233
+
3234
+ # Add colorbar at the bottom with reduced padding and no border
3235
+ cbar = ax.figure.colorbar(im, ax=ax, orientation="horizontal", pad=0.1) # type: ignore
3236
+ unit = param_data["Org_Result_Unit"].iloc[0] if not param_data.empty else ""
3237
+ cbar.ax.set_xlabel(f"Mean ({unit})")
3238
+ cbar.outline.set_visible(False) # type: ignore
3239
+
3240
+ # Set title
3241
+ ax.set_title(parameter)
3242
+
3243
+ plt.tight_layout()
3244
+
3245
+ # Reset index to make Sector a column and add parameter column
3246
+ plot_data = plot_data.reset_index()
3247
+ plot_data.insert(0, "parameter", parameter)
3248
+
3249
+ return fig, param_data, plot_data
ui/pages/sector_compare.py CHANGED
@@ -3,7 +3,11 @@ import io
3
  import pandas as pd
4
  import streamlit as st
5
 
6
- from analysis import plot_sector_line_charts
 
 
 
 
7
  from components import render_filtered_data_preview
8
  from dashboard_analytics import log_visit
9
 
@@ -11,10 +15,35 @@ st.title("Sector Comparison Charts")
11
  log_visit("Sector Comparison Charts")
12
 
13
  # Get data from session state
14
- raw_df = st.session_state.data["raw_df"]
15
  raw_df["Date"] = pd.to_datetime(raw_df["Activity_Start_Date_Time"]).dt.date
16
 
17
  # Create sidebar controls
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  selected_parameter = st.sidebar.selectbox(
19
  "Parameter:",
20
  options=sorted(raw_df["Org_Analyte_Name"].unique()),
@@ -25,14 +54,6 @@ selected_parameter = st.sidebar.selectbox(
25
  key="sector_compare_parameter_select",
26
  )
27
 
28
- # Add show_sem checkbox
29
- show_sem = st.sidebar.checkbox(
30
- "Show Standard Error",
31
- value=True,
32
- help="Display standard error bands around the mean lines",
33
- key="sector_compare_show_sem",
34
- )
35
-
36
  # Add year filter slider
37
  years = sorted(raw_df["Activity_Start_Date_Time"].dt.year.unique())
38
  default_min_year = min(years)
@@ -54,15 +75,32 @@ filtered_raw_df = raw_df[
54
  ]
55
 
56
  try:
57
- fig, param_data, plot_data = plot_sector_line_charts(
58
- filtered_raw_df, selected_parameter, show_sem=show_sem
59
- )
60
- st.pyplot(fig)
 
 
61
 
62
- fig, _, _ = plot_sector_line_charts(
63
- filtered_raw_df, selected_parameter, show_sem=show_sem, panel_chart=True
64
- )
65
- st.pyplot(fig)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
  # Add Chart Data expander
68
  with st.expander("Chart Data"):
 
3
  import pandas as pd
4
  import streamlit as st
5
 
6
+ from analysis import (
7
+ plot_sector_box_charts,
8
+ plot_sector_heatmap,
9
+ plot_sector_line_charts,
10
+ )
11
  from components import render_filtered_data_preview
12
  from dashboard_analytics import log_visit
13
 
 
15
  log_visit("Sector Comparison Charts")
16
 
17
  # Get data from session state
18
+ raw_df = st.session_state.data["raw_df"].copy()
19
  raw_df["Date"] = pd.to_datetime(raw_df["Activity_Start_Date_Time"]).dt.date
20
 
21
  # Create sidebar controls
22
+ chart_type = st.sidebar.radio(
23
+ "Chart Type:",
24
+ options=["Line Charts", "Box and Whisker", "Heatmap"],
25
+ help="Select the type of chart to display.",
26
+ key="sector_compare_chart_type",
27
+ )
28
+
29
+ # Only show relevant controls based on chart type
30
+ if chart_type in ["Line Charts", "Heatmap"]:
31
+ st.sidebar.markdown("#### Chart Specific Options")
32
+ if chart_type == "Line Charts":
33
+ show_sem = st.sidebar.checkbox(
34
+ "Show Standard Error",
35
+ value=True,
36
+ help="Display standard error bands around the mean lines",
37
+ key="sector_compare_show_sem",
38
+ )
39
+ elif chart_type == "Heatmap":
40
+ show_values = st.sidebar.checkbox(
41
+ "Show Means",
42
+ value=False,
43
+ help="Display mean values inside each cell",
44
+ key="sector_compare_show_values",
45
+ )
46
+
47
  selected_parameter = st.sidebar.selectbox(
48
  "Parameter:",
49
  options=sorted(raw_df["Org_Analyte_Name"].unique()),
 
54
  key="sector_compare_parameter_select",
55
  )
56
 
 
 
 
 
 
 
 
 
57
  # Add year filter slider
58
  years = sorted(raw_df["Activity_Start_Date_Time"].dt.year.unique())
59
  default_min_year = min(years)
 
75
  ]
76
 
77
  try:
78
+ if chart_type == "Line Charts":
79
+ # Line charts
80
+ fig, param_data, plot_data = plot_sector_line_charts(
81
+ filtered_raw_df, selected_parameter, show_sem=show_sem
82
+ )
83
+ st.pyplot(fig)
84
 
85
+ fig, _, _ = plot_sector_line_charts(
86
+ filtered_raw_df, selected_parameter, show_sem=show_sem, panel_chart=True
87
+ )
88
+ st.pyplot(fig)
89
+ elif chart_type == "Box and Whisker":
90
+ # Add box plots
91
+ st.subheader("Distribution by Sector and Year")
92
+ fig, param_data, plot_data = plot_sector_box_charts(
93
+ filtered_raw_df, selected_parameter
94
+ )
95
+ st.pyplot(fig)
96
+ else: # Heatmap
97
+ # Create heatmap
98
+ fig, param_data, plot_data = plot_sector_heatmap(
99
+ filtered_raw_df,
100
+ selected_parameter,
101
+ show_values=show_values if "show_values" in locals() else False,
102
+ )
103
+ st.pyplot(fig)
104
 
105
  # Add Chart Data expander
106
  with st.expander("Chart Data"):