github-actions[bot] commited on
Commit ·
9c9ca61
1
Parent(s): 8808c8a
Deploy from GitHub Actions
Browse files- analysis.py +52 -10
- ui/pages/sector_compare.py +9 -2
analysis.py
CHANGED
|
@@ -2745,11 +2745,11 @@ def plot_sector_line_charts(
|
|
| 2745 |
- DataFrame: Filtered dataframe containing the data used in the plot
|
| 2746 |
- DataFrame: Contains the plotted data points with means and standard errors
|
| 2747 |
"""
|
| 2748 |
-
GREY10 = "#1a1a1a"
|
| 2749 |
-
GREY30 = "#4d4d4d"
|
| 2750 |
-
GREY40 = "#666666"
|
| 2751 |
-
GREY75 = "#bfbfbf"
|
| 2752 |
-
GREY91 = "#e8e8e8"
|
| 2753 |
|
| 2754 |
# 1. Data preparation
|
| 2755 |
param_data = df[df["Org_Analyte_Name"] == parameter].copy()
|
|
@@ -3008,9 +3008,11 @@ def plot_sector_box_charts(
|
|
| 3008 |
df: pd.DataFrame,
|
| 3009 |
parameter: str,
|
| 3010 |
color_scale: list[str] = COLOR_SCALE,
|
|
|
|
| 3011 |
) -> tuple[Figure, pd.DataFrame, pd.DataFrame]:
|
| 3012 |
"""
|
| 3013 |
-
Create box plots showing the distribution of parameter values by sector and year
|
|
|
|
| 3014 |
|
| 3015 |
Parameters:
|
| 3016 |
-----------
|
|
@@ -3020,6 +3022,8 @@ def plot_sector_box_charts(
|
|
| 3020 |
Parameter to plot
|
| 3021 |
color_scale : list[str]
|
| 3022 |
List of colors to use for sectors
|
|
|
|
|
|
|
| 3023 |
|
| 3024 |
Returns:
|
| 3025 |
--------
|
|
@@ -3028,12 +3032,11 @@ def plot_sector_box_charts(
|
|
| 3028 |
- DataFrame: Filtered dataframe containing the raw data used in the plot
|
| 3029 |
- DataFrame: Contains the plotted data points: mean, median, and quartiles
|
| 3030 |
"""
|
|
|
|
|
|
|
| 3031 |
# Define consistent colors for styling
|
| 3032 |
-
GREY10 = "#1a1a1a"
|
| 3033 |
GREY30 = "#4d4d4d"
|
| 3034 |
GREY40 = "#666666"
|
| 3035 |
-
GREY75 = "#bfbfbf"
|
| 3036 |
-
GREY91 = "#e8e8e8"
|
| 3037 |
|
| 3038 |
# Filter data for parameter
|
| 3039 |
param_data = df[df["Org_Analyte_Name"] == parameter].copy()
|
|
@@ -3063,7 +3066,7 @@ def plot_sector_box_charts(
|
|
| 3063 |
ax = plt.subplot(len(sectors), 1, idx + 1)
|
| 3064 |
sector_data = param_data[param_data["Sector"] == sector]
|
| 3065 |
|
| 3066 |
-
bp = ax.boxplot(
|
| 3067 |
[
|
| 3068 |
sector_data[sector_data["Reporting_Year"] == year][
|
| 3069 |
"Org_Result_Value"
|
|
@@ -3084,6 +3087,45 @@ def plot_sector_box_charts(
|
|
| 3084 |
positions=range(len(years)),
|
| 3085 |
)
|
| 3086 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3087 |
# Set proper x-axis limits with padding
|
| 3088 |
ax.set_xlim(-0.5, len(years) - 0.5)
|
| 3089 |
|
|
|
|
| 2745 |
- DataFrame: Filtered dataframe containing the data used in the plot
|
| 2746 |
- DataFrame: Contains the plotted data points with means and standard errors
|
| 2747 |
"""
|
| 2748 |
+
GREY10 = "#1a1a1a" # noqa: F841
|
| 2749 |
+
GREY30 = "#4d4d4d" # noqa: F841
|
| 2750 |
+
GREY40 = "#666666" # noqa: F841
|
| 2751 |
+
GREY75 = "#bfbfbf" # noqa: F841
|
| 2752 |
+
GREY91 = "#e8e8e8" # noqa: F841
|
| 2753 |
|
| 2754 |
# 1. Data preparation
|
| 2755 |
param_data = df[df["Org_Analyte_Name"] == parameter].copy()
|
|
|
|
| 3008 |
df: pd.DataFrame,
|
| 3009 |
parameter: str,
|
| 3010 |
color_scale: list[str] = COLOR_SCALE,
|
| 3011 |
+
show_trend: bool = True, # New parameter
|
| 3012 |
) -> tuple[Figure, pd.DataFrame, pd.DataFrame]:
|
| 3013 |
"""
|
| 3014 |
+
Create box plots showing the distribution of parameter values by sector and year,
|
| 3015 |
+
with optional trend lines and statistics.
|
| 3016 |
|
| 3017 |
Parameters:
|
| 3018 |
-----------
|
|
|
|
| 3022 |
Parameter to plot
|
| 3023 |
color_scale : list[str]
|
| 3024 |
List of colors to use for sectors
|
| 3025 |
+
show_trend : bool, default=True
|
| 3026 |
+
Whether to show trend lines and statistics
|
| 3027 |
|
| 3028 |
Returns:
|
| 3029 |
--------
|
|
|
|
| 3032 |
- DataFrame: Filtered dataframe containing the raw data used in the plot
|
| 3033 |
- DataFrame: Contains the plotted data points: mean, median, and quartiles
|
| 3034 |
"""
|
| 3035 |
+
from scipy import stats
|
| 3036 |
+
|
| 3037 |
# Define consistent colors for styling
|
|
|
|
| 3038 |
GREY30 = "#4d4d4d"
|
| 3039 |
GREY40 = "#666666"
|
|
|
|
|
|
|
| 3040 |
|
| 3041 |
# Filter data for parameter
|
| 3042 |
param_data = df[df["Org_Analyte_Name"] == parameter].copy()
|
|
|
|
| 3066 |
ax = plt.subplot(len(sectors), 1, idx + 1)
|
| 3067 |
sector_data = param_data[param_data["Sector"] == sector]
|
| 3068 |
|
| 3069 |
+
bp = ax.boxplot( # noqa: F841
|
| 3070 |
[
|
| 3071 |
sector_data[sector_data["Reporting_Year"] == year][
|
| 3072 |
"Org_Result_Value"
|
|
|
|
| 3087 |
positions=range(len(years)),
|
| 3088 |
)
|
| 3089 |
|
| 3090 |
+
# Only add trend line and stats if show_trend is True
|
| 3091 |
+
if show_trend:
|
| 3092 |
+
# Calculate annual means for trend line
|
| 3093 |
+
annual_means = [
|
| 3094 |
+
sector_data[sector_data["Reporting_Year"] == year][
|
| 3095 |
+
"Org_Result_Value"
|
| 3096 |
+
].mean()
|
| 3097 |
+
for year in years
|
| 3098 |
+
]
|
| 3099 |
+
|
| 3100 |
+
# Remove any NaN values for regression
|
| 3101 |
+
valid_points = [
|
| 3102 |
+
(x, y) for x, y in enumerate(annual_means) if not np.isnan(y)
|
| 3103 |
+
]
|
| 3104 |
+
if valid_points:
|
| 3105 |
+
x_valid, y_valid = zip(*valid_points)
|
| 3106 |
+
|
| 3107 |
+
# Perform linear regression
|
| 3108 |
+
slope, intercept, r_value, p_value, std_err = stats.linregress(
|
| 3109 |
+
x_valid, y_valid
|
| 3110 |
+
)
|
| 3111 |
+
|
| 3112 |
+
# Plot trend line
|
| 3113 |
+
line_x = np.array(x_valid)
|
| 3114 |
+
line_y = slope * line_x + intercept
|
| 3115 |
+
ax.plot(line_x, line_y, "--", color="red", alpha=0.7, linewidth=1.5)
|
| 3116 |
+
|
| 3117 |
+
# Add statistics text
|
| 3118 |
+
stats_text = f"R² = {r_value**2:.3f}\np = {p_value:.3f}" # type: ignore
|
| 3119 |
+
ax.text(
|
| 3120 |
+
0.02,
|
| 3121 |
+
0.98,
|
| 3122 |
+
stats_text,
|
| 3123 |
+
transform=ax.transAxes,
|
| 3124 |
+
verticalalignment="top",
|
| 3125 |
+
fontsize=8,
|
| 3126 |
+
bbox=dict(facecolor="white", alpha=0.8, edgecolor="none"),
|
| 3127 |
+
)
|
| 3128 |
+
|
| 3129 |
# Set proper x-axis limits with padding
|
| 3130 |
ax.set_xlim(-0.5, len(years) - 0.5)
|
| 3131 |
|
ui/pages/sector_compare.py
CHANGED
|
@@ -27,7 +27,7 @@ chart_type = st.sidebar.radio(
|
|
| 27 |
)
|
| 28 |
|
| 29 |
# Only show relevant controls based on chart type
|
| 30 |
-
if chart_type in ["Line Charts", "Heatmap"]:
|
| 31 |
st.sidebar.markdown("#### Chart Specific Options")
|
| 32 |
if chart_type == "Line Charts":
|
| 33 |
show_sem = st.sidebar.checkbox(
|
|
@@ -43,6 +43,13 @@ elif chart_type == "Heatmap":
|
|
| 43 |
help="Display mean values inside each cell",
|
| 44 |
key="sector_compare_show_values",
|
| 45 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
selected_parameter = st.sidebar.selectbox(
|
| 48 |
"Parameter:",
|
|
@@ -89,7 +96,7 @@ try:
|
|
| 89 |
elif chart_type == "Box and Whisker":
|
| 90 |
# Add box plots
|
| 91 |
fig, param_data, plot_data = plot_sector_box_charts(
|
| 92 |
-
filtered_raw_df, selected_parameter
|
| 93 |
)
|
| 94 |
st.pyplot(fig)
|
| 95 |
else: # Heatmap
|
|
|
|
| 27 |
)
|
| 28 |
|
| 29 |
# Only show relevant controls based on chart type
|
| 30 |
+
if chart_type in ["Line Charts", "Heatmap", "Box and Whisker"]:
|
| 31 |
st.sidebar.markdown("#### Chart Specific Options")
|
| 32 |
if chart_type == "Line Charts":
|
| 33 |
show_sem = st.sidebar.checkbox(
|
|
|
|
| 43 |
help="Display mean values inside each cell",
|
| 44 |
key="sector_compare_show_values",
|
| 45 |
)
|
| 46 |
+
elif chart_type == "Box and Whisker":
|
| 47 |
+
show_trend = st.sidebar.checkbox(
|
| 48 |
+
"Show Trend Line",
|
| 49 |
+
value=True,
|
| 50 |
+
help="Display trend line with R² and p-values",
|
| 51 |
+
key="sector_compare_show_trend",
|
| 52 |
+
)
|
| 53 |
|
| 54 |
selected_parameter = st.sidebar.selectbox(
|
| 55 |
"Parameter:",
|
|
|
|
| 96 |
elif chart_type == "Box and Whisker":
|
| 97 |
# Add box plots
|
| 98 |
fig, param_data, plot_data = plot_sector_box_charts(
|
| 99 |
+
filtered_raw_df, selected_parameter, show_trend=show_trend
|
| 100 |
)
|
| 101 |
st.pyplot(fig)
|
| 102 |
else: # Heatmap
|