github-actions[bot] commited on
Commit
9c9ca61
·
1 Parent(s): 8808c8a

Deploy from GitHub Actions

Browse files
Files changed (2) hide show
  1. analysis.py +52 -10
  2. ui/pages/sector_compare.py +9 -2
analysis.py CHANGED
@@ -2745,11 +2745,11 @@ def plot_sector_line_charts(
2745
  - DataFrame: Filtered dataframe containing the data used in the plot
2746
  - DataFrame: Contains the plotted data points with means and standard errors
2747
  """
2748
- GREY10 = "#1a1a1a"
2749
- GREY30 = "#4d4d4d"
2750
- GREY40 = "#666666"
2751
- GREY75 = "#bfbfbf"
2752
- GREY91 = "#e8e8e8"
2753
 
2754
  # 1. Data preparation
2755
  param_data = df[df["Org_Analyte_Name"] == parameter].copy()
@@ -3008,9 +3008,11 @@ def plot_sector_box_charts(
3008
  df: pd.DataFrame,
3009
  parameter: str,
3010
  color_scale: list[str] = COLOR_SCALE,
 
3011
  ) -> tuple[Figure, pd.DataFrame, pd.DataFrame]:
3012
  """
3013
- Create box plots showing the distribution of parameter values by sector and year.
 
3014
 
3015
  Parameters:
3016
  -----------
@@ -3020,6 +3022,8 @@ def plot_sector_box_charts(
3020
  Parameter to plot
3021
  color_scale : list[str]
3022
  List of colors to use for sectors
 
 
3023
 
3024
  Returns:
3025
  --------
@@ -3028,12 +3032,11 @@ def plot_sector_box_charts(
3028
  - DataFrame: Filtered dataframe containing the raw data used in the plot
3029
  - DataFrame: Contains the plotted data points: mean, median, and quartiles
3030
  """
 
 
3031
  # Define consistent colors for styling
3032
- GREY10 = "#1a1a1a"
3033
  GREY30 = "#4d4d4d"
3034
  GREY40 = "#666666"
3035
- GREY75 = "#bfbfbf"
3036
- GREY91 = "#e8e8e8"
3037
 
3038
  # Filter data for parameter
3039
  param_data = df[df["Org_Analyte_Name"] == parameter].copy()
@@ -3063,7 +3066,7 @@ def plot_sector_box_charts(
3063
  ax = plt.subplot(len(sectors), 1, idx + 1)
3064
  sector_data = param_data[param_data["Sector"] == sector]
3065
 
3066
- bp = ax.boxplot(
3067
  [
3068
  sector_data[sector_data["Reporting_Year"] == year][
3069
  "Org_Result_Value"
@@ -3084,6 +3087,45 @@ def plot_sector_box_charts(
3084
  positions=range(len(years)),
3085
  )
3086
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3087
  # Set proper x-axis limits with padding
3088
  ax.set_xlim(-0.5, len(years) - 0.5)
3089
 
 
2745
  - DataFrame: Filtered dataframe containing the data used in the plot
2746
  - DataFrame: Contains the plotted data points with means and standard errors
2747
  """
2748
+ GREY10 = "#1a1a1a" # noqa: F841
2749
+ GREY30 = "#4d4d4d" # noqa: F841
2750
+ GREY40 = "#666666" # noqa: F841
2751
+ GREY75 = "#bfbfbf" # noqa: F841
2752
+ GREY91 = "#e8e8e8" # noqa: F841
2753
 
2754
  # 1. Data preparation
2755
  param_data = df[df["Org_Analyte_Name"] == parameter].copy()
 
3008
  df: pd.DataFrame,
3009
  parameter: str,
3010
  color_scale: list[str] = COLOR_SCALE,
3011
+ show_trend: bool = True, # New parameter
3012
  ) -> tuple[Figure, pd.DataFrame, pd.DataFrame]:
3013
  """
3014
+ Create box plots showing the distribution of parameter values by sector and year,
3015
+ with optional trend lines and statistics.
3016
 
3017
  Parameters:
3018
  -----------
 
3022
  Parameter to plot
3023
  color_scale : list[str]
3024
  List of colors to use for sectors
3025
+ show_trend : bool, default=True
3026
+ Whether to show trend lines and statistics
3027
 
3028
  Returns:
3029
  --------
 
3032
  - DataFrame: Filtered dataframe containing the raw data used in the plot
3033
  - DataFrame: Contains the plotted data points: mean, median, and quartiles
3034
  """
3035
+ from scipy import stats
3036
+
3037
  # Define consistent colors for styling
 
3038
  GREY30 = "#4d4d4d"
3039
  GREY40 = "#666666"
 
 
3040
 
3041
  # Filter data for parameter
3042
  param_data = df[df["Org_Analyte_Name"] == parameter].copy()
 
3066
  ax = plt.subplot(len(sectors), 1, idx + 1)
3067
  sector_data = param_data[param_data["Sector"] == sector]
3068
 
3069
+ bp = ax.boxplot( # noqa: F841
3070
  [
3071
  sector_data[sector_data["Reporting_Year"] == year][
3072
  "Org_Result_Value"
 
3087
  positions=range(len(years)),
3088
  )
3089
 
3090
+ # Only add trend line and stats if show_trend is True
3091
+ if show_trend:
3092
+ # Calculate annual means for trend line
3093
+ annual_means = [
3094
+ sector_data[sector_data["Reporting_Year"] == year][
3095
+ "Org_Result_Value"
3096
+ ].mean()
3097
+ for year in years
3098
+ ]
3099
+
3100
+ # Remove any NaN values for regression
3101
+ valid_points = [
3102
+ (x, y) for x, y in enumerate(annual_means) if not np.isnan(y)
3103
+ ]
3104
+ if valid_points:
3105
+ x_valid, y_valid = zip(*valid_points)
3106
+
3107
+ # Perform linear regression
3108
+ slope, intercept, r_value, p_value, std_err = stats.linregress(
3109
+ x_valid, y_valid
3110
+ )
3111
+
3112
+ # Plot trend line
3113
+ line_x = np.array(x_valid)
3114
+ line_y = slope * line_x + intercept
3115
+ ax.plot(line_x, line_y, "--", color="red", alpha=0.7, linewidth=1.5)
3116
+
3117
+ # Add statistics text
3118
+ stats_text = f"R² = {r_value**2:.3f}\np = {p_value:.3f}" # type: ignore
3119
+ ax.text(
3120
+ 0.02,
3121
+ 0.98,
3122
+ stats_text,
3123
+ transform=ax.transAxes,
3124
+ verticalalignment="top",
3125
+ fontsize=8,
3126
+ bbox=dict(facecolor="white", alpha=0.8, edgecolor="none"),
3127
+ )
3128
+
3129
  # Set proper x-axis limits with padding
3130
  ax.set_xlim(-0.5, len(years) - 0.5)
3131
 
ui/pages/sector_compare.py CHANGED
@@ -27,7 +27,7 @@ chart_type = st.sidebar.radio(
27
  )
28
 
29
  # Only show relevant controls based on chart type
30
- if chart_type in ["Line Charts", "Heatmap"]:
31
  st.sidebar.markdown("#### Chart Specific Options")
32
  if chart_type == "Line Charts":
33
  show_sem = st.sidebar.checkbox(
@@ -43,6 +43,13 @@ elif chart_type == "Heatmap":
43
  help="Display mean values inside each cell",
44
  key="sector_compare_show_values",
45
  )
 
 
 
 
 
 
 
46
 
47
  selected_parameter = st.sidebar.selectbox(
48
  "Parameter:",
@@ -89,7 +96,7 @@ try:
89
  elif chart_type == "Box and Whisker":
90
  # Add box plots
91
  fig, param_data, plot_data = plot_sector_box_charts(
92
- filtered_raw_df, selected_parameter
93
  )
94
  st.pyplot(fig)
95
  else: # Heatmap
 
27
  )
28
 
29
  # Only show relevant controls based on chart type
30
+ if chart_type in ["Line Charts", "Heatmap", "Box and Whisker"]:
31
  st.sidebar.markdown("#### Chart Specific Options")
32
  if chart_type == "Line Charts":
33
  show_sem = st.sidebar.checkbox(
 
43
  help="Display mean values inside each cell",
44
  key="sector_compare_show_values",
45
  )
46
+ elif chart_type == "Box and Whisker":
47
+ show_trend = st.sidebar.checkbox(
48
+ "Show Trend Line",
49
+ value=True,
50
+ help="Display trend line with R² and p-values",
51
+ key="sector_compare_show_trend",
52
+ )
53
 
54
  selected_parameter = st.sidebar.selectbox(
55
  "Parameter:",
 
96
  elif chart_type == "Box and Whisker":
97
  # Add box plots
98
  fig, param_data, plot_data = plot_sector_box_charts(
99
+ filtered_raw_df, selected_parameter, show_trend=show_trend
100
  )
101
  st.pyplot(fig)
102
  else: # Heatmap