| import math |
| import sys |
| import textwrap |
| from pathlib import Path |
|
|
| import altair as alt |
| import contextily as ctx |
| import geopandas as gpd |
| import matplotlib.dates as mdates |
| import matplotlib.pyplot as plt |
| import numpy as np |
| import pandas as pd |
| import plotly.graph_objects as go |
| import scipy.stats as stats |
| import seaborn as sns |
| import streamlit as st |
| from matplotlib.colors import LinearSegmentedColormap |
| from matplotlib.figure import Figure |
| from osgeo import gdal |
| from plotly.subplots import make_subplots |
|
|
| from utils.data_loading import timer |
|
|
| COLOR_SCALE = [ |
| "#6D3E91", |
| "#C05917", |
| "#58AC8C", |
| "#286BBB", |
| "#883039", |
| "#BC8E5A", |
| "#00295B", |
| "#C15065", |
| "#18470F", |
| "#9A5129", |
| "#E56E5A", |
| "#A2559C", |
| "#38AABA", |
| "#578145", |
| "#970046", |
| "#00847E", |
| "#B13507", |
| "#4C6A9C", |
| "#CF0A66", |
| "#00875E", |
| "#B16214", |
| "#8C4569", |
| "#3B8E1D", |
| "#D73C50", |
| ] |
|
|
|
|
| @st.cache_data |
| @timer(include_params=True) |
| def plot_trends_by_station( |
| df: pd.DataFrame, analyte_names: list[str], sample_position: str, figsize=(15, 12) |
| ) -> Figure: |
| """ |
| Create subplots of analyte trends for the given dataframe and analytes. |
| |
| Parameters: |
| ----------- |
| df : pandas DataFrame |
| The filtered dataframe containing data for a specific station and position |
| analyte_names : list[str] |
| List of analyte names to plot |
| figsize : tuple |
| Figure size in inches (width, height) |
| """ |
| |
| n_rows = (len(analyte_names) + 1) // 2 |
|
|
| fig, axes = plt.subplots(n_rows, 2, figsize=figsize) |
| axes = axes.flatten() |
|
|
| station_number = df["Station_Number"].iloc[0] |
| station_name = df["Name"].iloc[0] |
|
|
| if sample_position == "All": |
| sample_position_label = "Surface and Bottom" |
| else: |
| sample_position_label = sample_position |
|
|
| for idx, analyte_name in enumerate(analyte_names): |
| ax = axes[idx] |
| data = ( |
| df[df["Org_Analyte_Name"] == analyte_name] |
| .assign( |
| Year=lambda df: ( |
| df["Reporting_Year"] |
| if "Reporting_Year" in df.columns |
| else df["Activity_Start_Date_Time"].dt.year |
| ) |
| ) |
| .dropna(subset=["Org_Result_Value"]) |
| ) |
|
|
| if data.empty: |
| ax.text( |
| 0.5, |
| 0.5, |
| f"No data available for {analyte_name}", |
| ha="center", |
| va="center", |
| ) |
| continue |
|
|
| |
| log_scale_analytes = [ |
| "Turbidity", |
| "Fecal Coliform (MPN)", |
| "Total Nitrogen", |
| "Total Phosphorus", |
| ] |
| log_scale = analyte_name in log_scale_analytes |
| if log_scale: |
| ax.set_yscale("log") |
| ax.yaxis.set_major_formatter(plt.ScalarFormatter()) |
|
|
| |
| groups = data.groupby("Year", observed=True) |
| positions = np.array(list(groups.groups.keys())) |
| group_data = [group["Org_Result_Value"] for name, group in groups] |
|
|
| ax.boxplot( |
| group_data, |
| positions=positions, |
| widths=0.6, |
| patch_artist=True, |
| boxprops=dict(facecolor="lightblue", color="blue", alpha=0.5), |
| medianprops=dict(color="blue"), |
| whiskerprops=dict(color="blue"), |
| capprops=dict(color="blue"), |
| flierprops=dict(color="blue", markeredgecolor="blue", alpha=0.5), |
| ) |
|
|
| |
| yearly_means = data.groupby("Year", observed=True)["Org_Result_Value"].mean() |
| X = yearly_means.index.values.reshape(-1, 1) |
| y = yearly_means.values |
|
|
| |
| ax.plot(X, y, "bo-", linewidth=1, markersize=4, label="Annual Mean") |
|
|
| |
| if len(X) > 1: |
| slope, intercept, r_value, p_value, std_err = stats.linregress(X.ravel(), y) |
| trend_line = slope * X.ravel() + intercept |
| ax.plot(X, trend_line, "r--", alpha=0.8, linewidth=1, label="Trend") |
|
|
| |
| stats_text = f"R²={r_value**2:.3f}\np={p_value:.3f}" |
| ax.text( |
| 0.02, |
| 0.98, |
| stats_text, |
| transform=ax.transAxes, |
| verticalalignment="top", |
| bbox=dict(boxstyle="round", facecolor="white", alpha=0.8), |
| parse_math=False, |
| ) |
|
|
| |
| ax.set_title(f"{analyte_name}", pad=15) |
| ax.set_xlabel("Year") |
| analyte_unit = data["Org_Result_Unit"].iloc[0] |
| if analyte_name == "Depth, Secchi Disk Depth": |
| y_label = f"Depth ({analyte_unit})" |
| elif analyte_name == "pH": |
| y_label = None |
| elif analyte_name.startswith("Dissolved"): |
| y_label = f"DO ({analyte_unit})" |
| elif analyte_name.startswith("Fecal Coliform"): |
| y_label = f"Fecal Coliform ({analyte_unit})" |
| else: |
| y_label = f"{analyte_name} ({analyte_unit})" |
|
|
| ax.set_ylabel(y_label) |
| ax.grid(True, alpha=0.3) |
|
|
| |
| for year, group in groups: |
| ax.text( |
| year, |
| ax.get_ylim()[1], |
| f"n={len(group)}", |
| ha="center", |
| va="bottom", |
| fontsize=8, |
| ) |
|
|
| |
| for idx in range(len(analyte_names), len(axes)): |
| fig.delaxes(axes[idx]) |
|
|
| |
| fig.suptitle( |
| f"Water Quality Trends for {station_number} - {station_name} - {sample_position_label}", |
| fontsize=14, |
| y=0.95, |
| ) |
|
|
| |
| plt.tight_layout(rect=(0, 0, 1, 0.95)) |
| return fig |
|
|
|
|
| @timer(include_params=True) |
| def altair_plot_sector_trends( |
| df: pd.DataFrame, analyte_names: list[str] |
| ) -> alt.VConcatChart: |
| """ |
| Create plots of mean annual analyte trends by sector using Altair. |
| |
| Parameters: |
| ----------- |
| df : pd.DataFrame |
| Input dataframe |
| analyte_names : list[str] |
| List of analytes to plot |
| |
| Returns: |
| -------- |
| alt.VConcatChart |
| Vertically concatenated Altair charts for each analyte |
| """ |
| |
| color_scale = alt.Scale( |
| domain=df["Sector"].unique().tolist(), |
| range=[ |
| "#1f77b4", |
| "#ff7f0e", |
| "#2ca02c", |
| "#d62728", |
| "#9467bd", |
| "#8c564b", |
| "#e377c2", |
| "#7f7f7f", |
| ], |
| ) |
|
|
| charts = [] |
| for analyte_name in analyte_names: |
| |
| analyte_data = df[df["Org_Analyte_Name"] == analyte_name].copy() |
|
|
| |
| if analyte_name == "Salinity": |
| analyte_data = analyte_data[analyte_data["Sector"] != "Fresh Water Lakes"] |
|
|
| |
| processed_data = ( |
| analyte_data.groupby(["Reporting_Year", "Sector"], observed=True)[ |
| "Org_Result_Value" |
| ] |
| .agg(["mean", "sem"]) |
| .reset_index() |
| .rename(columns={"mean": "Mean", "sem": "SE"}) |
| ) |
|
|
| |
| processed_data["Upper"] = processed_data["Mean"] + processed_data["SE"] |
| processed_data["Lower"] = processed_data["Mean"] - processed_data["SE"] |
|
|
| |
| unit = analyte_data["Org_Result_Unit"].iloc[0] if not analyte_data.empty else "" |
|
|
| |
| use_log_scale = analyte_name in [ |
| "Turbidity", |
| "Fecal Coliform (MPN)", |
| "Total Nitrogen", |
| "Total Phosphorus", |
| ] |
|
|
| |
| base = alt.Chart(processed_data).encode( |
| x=alt.X("Reporting_Year:O", axis=alt.Axis(title=None)), |
| color=alt.Color("Sector:N", scale=color_scale), |
| tooltip=[ |
| alt.Tooltip("Reporting_Year:O"), |
| alt.Tooltip("Sector:N"), |
| alt.Tooltip("Mean:Q", format=".2f"), |
| alt.Tooltip("SE:Q", format=".2f"), |
| ], |
| ) |
|
|
| |
| lines = base.mark_line().encode( |
| y=alt.Y( |
| "Mean:Q", |
| title=f"({unit})", |
| scale=alt.Scale(type="log" if use_log_scale else "linear"), |
| ) |
| ) |
|
|
| points = base.mark_point(size=50).encode(y=alt.Y("Mean:Q")) |
|
|
| |
| area = base.mark_area(opacity=0.15).encode( |
| y=alt.Y("Lower:Q"), y2=alt.Y2("Upper:Q") |
| ) |
|
|
| |
| chart = ( |
| (area + lines + points) |
| .properties( |
| width=600, |
| height=300, |
| title=alt.TitleParams(text=analyte_name, anchor="middle", fontSize=14), |
| ) |
| .interactive() |
| ) |
|
|
| charts.append(chart) |
|
|
| |
| final_chart = alt.vconcat(*charts).configure( |
| view={"strokeWidth": 0}, axis={"grid": True, "gridOpacity": 0.2} |
| ) |
|
|
| return final_chart |
|
|
|
|
| def plotly_plot_analyte_trends(df: pd.DataFrame, analyte_names: list[str]) -> go.Figure: |
| """ |
| Create subplots of analyte trends using Plotly for the given dataframe and analytes. |
| |
| Parameters: |
| ----------- |
| df : pandas DataFrame |
| The filtered dataframe containing data for a specific station and position |
| analyte_names : list[str] |
| List of analyte names to plot |
| |
| Returns: |
| -------- |
| go.Figure |
| Plotly figure containing the subplots |
| """ |
| |
| n_rows = (len(analyte_names) + 1) // 2 |
|
|
| |
| fig = make_subplots( |
| rows=n_rows, |
| cols=2, |
| subplot_titles=analyte_names, |
| vertical_spacing=0.12, |
| horizontal_spacing=0.1, |
| ) |
|
|
| station_number = df["Station_Number"].iloc[0] |
| sample_position = df["Sample_Position"].iloc[0] |
|
|
| for idx, analyte_name in enumerate(analyte_names): |
| row = idx // 2 + 1 |
| col = idx % 2 + 1 |
|
|
| data = ( |
| df[df["Org_Analyte_Name"] == analyte_name] |
| .assign(Year=lambda df: df["Activity_Start_Date_Time"].dt.year) |
| .dropna(subset=["Org_Result_Value"]) |
| ) |
|
|
| if data.empty: |
| fig.add_annotation( |
| text=f"No data available for {analyte_name}", |
| xref=f"x{idx+1}", |
| yref=f"y{idx+1}", |
| x=0.5, |
| y=0.5, |
| showarrow=False, |
| row=row, |
| col=col, |
| ) |
| continue |
|
|
| |
| log_scale = analyte_name in ["Turbidity", "Fecal Coliform (MPN)"] |
|
|
| |
| groups = data.groupby("Year", observed=True) |
| years = list(groups.groups.keys()) |
|
|
| |
| fig.add_trace( |
| go.Box( |
| x=data["Year"], |
| y=data["Org_Result_Value"], |
| name="Box Plot", |
| boxpoints="outliers", |
| line=dict(color="blue"), |
| fillcolor="lightblue", |
| showlegend=False, |
| ), |
| row=row, |
| col=col, |
| ) |
|
|
| |
| yearly_means = data.groupby("Year", observed=True)["Org_Result_Value"].mean() |
|
|
| |
| fig.add_trace( |
| go.Scatter( |
| x=years, |
| y=yearly_means.values, |
| mode="lines+markers", |
| name="Annual Mean", |
| line=dict(color="blue"), |
| showlegend=False, |
| ), |
| row=row, |
| col=col, |
| ) |
|
|
| |
| if len(years) > 1: |
| X = np.array(years) |
| y = yearly_means.values |
| slope, intercept, r_value, p_value, std_err = stats.linregress(X, y) |
| trend_line = slope * X + intercept |
|
|
| fig.add_trace( |
| go.Scatter( |
| x=years, |
| y=trend_line, |
| mode="lines", |
| name="Trend", |
| line=dict(color="red", dash="dash"), |
| showlegend=False, |
| ), |
| row=row, |
| col=col, |
| ) |
|
|
| |
| stats_text = f"R² = {r_value**2:.3f}<br>p = {p_value:.3f}" |
| fig.add_annotation( |
| text=stats_text, |
| xref=f"x{idx+1}", |
| yref=f"y{idx+1}", |
| x=min(years), |
| y=max(data["Org_Result_Value"]), |
| showarrow=False, |
| bgcolor="white", |
| bordercolor="black", |
| borderwidth=1, |
| row=row, |
| col=col, |
| ) |
|
|
| |
| for year, group in groups: |
| fig.add_annotation( |
| text=f"n={len(group)}", |
| x=year, |
| y=max(data["Org_Result_Value"]), |
| showarrow=False, |
| font=dict(size=8), |
| row=row, |
| col=col, |
| ) |
|
|
| |
| if log_scale: |
| fig.update_yaxes(type="log", row=row, col=col) |
|
|
| fig.update_xaxes(title_text="Year", row=row, col=col) |
| fig.update_yaxes( |
| title_text=f'Value ({data["Org_Result_Unit"].iloc[0]})', row=row, col=col |
| ) |
|
|
| |
| fig.update_layout( |
| title=f"Water Quality Trends<br>Station {station_number} - {sample_position}", |
| title_x=0.5, |
| showlegend=False, |
| height=300 * n_rows + 100, |
| width=1000, |
| template="plotly_white", |
| ) |
|
|
| return fig |
|
|
|
|
| @timer(include_params=True) |
| def plot_sector_trends( |
| df: pd.DataFrame, analyte_names: list[str], base_height: float = 4 |
| ) -> Figure: |
| """ |
| Create plots of mean annual analyte trends by sector. |
| |
| Parameters: |
| ----------- |
| df : pd.DataFrame |
| Input dataframe |
| analyte_names : list[str] |
| List of analytes to plot |
| base_height : float |
| Height per subplot in inches (default=4) |
| """ |
| |
| n_rows = len(analyte_names) |
| fig_height = base_height * n_rows |
|
|
| |
| fig, axes = plt.subplots(n_rows, 1, figsize=(15, fig_height)) |
| if n_rows == 1: |
| axes = [axes] |
|
|
| custom_colors = [ |
| "#1f77b4", |
| "#ff7f0e", |
| "#2ca02c", |
| "#d62728", |
| "#9467bd", |
| "#8c564b", |
| "#e377c2", |
| "#7f7f7f", |
| ] |
|
|
| for idx, analyte_name in enumerate(analyte_names): |
| ax = axes[idx] |
|
|
| |
| analyte_data = df[df["Org_Analyte_Name"] == analyte_name] |
|
|
| |
| if analyte_name == "Salinity": |
| analyte_data = analyte_data[analyte_data["Sector"] != "Freshwater Lakes"] |
|
|
| |
| for sector, color in zip(df["Sector"].unique(), custom_colors): |
| sector_data = ( |
| analyte_data[analyte_data["Sector"] == sector] |
| .groupby("Reporting_Year", observed=True)["Org_Result_Value"] |
| .agg(["mean", "sem"]) |
| .reset_index() |
| ) |
|
|
| if not sector_data.empty: |
| |
| ax.plot( |
| sector_data["Reporting_Year"], |
| sector_data["mean"], |
| "-o", |
| color=color, |
| label=sector, |
| markersize=4, |
| linewidth=2, |
| ) |
|
|
| |
| ax.fill_between( |
| sector_data["Reporting_Year"], |
| sector_data["mean"] - sector_data["sem"], |
| sector_data["mean"] + sector_data["sem"], |
| color=color, |
| alpha=0.15, |
| ) |
|
|
| |
| years = sorted(analyte_data["Reporting_Year"].unique()) |
| ax.set_xticks(years) |
| ax.set_xticklabels(years) |
|
|
| |
| ax.set_title(analyte_name, pad=10, fontsize=11, fontweight="normal") |
| ax.set_xlabel("") |
|
|
| if not analyte_data.empty: |
| analyte_unit = analyte_data["Org_Result_Unit"].iloc[0] |
| ax.set_ylabel(f"({analyte_unit})", fontsize=10) |
|
|
| |
| ax.grid(True, alpha=0.2, linestyle="--") |
| ax.spines["top"].set_visible(False) |
| ax.spines["right"].set_visible(False) |
|
|
| |
| ax.legend( |
| bbox_to_anchor=(1.05, 1), |
| loc="upper left", |
| borderaxespad=0.0, |
| frameon=True, |
| fancybox=False, |
| shadow=False, |
| fontsize=9, |
| ) |
|
|
| if analyte_name in [ |
| "Turbidity", |
| "Fecal Coliform (MPN)", |
| "Total Nitrogen", |
| "Total Phosphorus", |
| ]: |
| ax.set_yscale("log") |
|
|
| |
| plt.tight_layout(rect=(0, 0, 0.85, 1), h_pad=2.0) |
| return fig |
|
|
|
|
| @st.cache_data |
| @timer(include_params=True) |
| def plot_parameter_correlations( |
| df: pd.DataFrame, |
| analyte_names: list[str], |
| subset_by: str, |
| subset: str, |
| filter_by: str, |
| threshold: float = 0.2, |
| ) -> tuple[Figure, pd.DataFrame]: |
| """ |
| Creates a correlation heatmap showing relationships between water quality parameters, |
| with additional information about data completeness. |
| |
| Parameters |
| ---------- |
| df : pd.DataFrame |
| Input DataFrame containing water quality measurements. Must have columns: |
| - Org_Analyte_Name: Name of the analyte |
| - Org_Result_Value: Measurement value |
| - Activity_Start_Date_Time: Timestamp of measurement |
| - Reporting_Year: Year of measurement |
| - Station_Number: Monitoring station identifier |
| - Name: Station name |
| - Sample_Position: Sample depth position (e.g., "Surface", "Bottom") |
| |
| analyte_names : list[str] |
| List of analyte names to include in correlation analysis |
| |
| subset_by : str |
| Column name used for subsetting the data (e.g., "Sector", "Waterbody_Class") |
| |
| subset : str |
| Value within subset_by column to filter data (e.g., specific sector name) |
| |
| filter_by : str |
| Sample position filter ("Surface", "Bottom", or "All") |
| |
| threshold : float, default=0.2 |
| Minimum data completeness threshold (0-1). Parameters with completeness below |
| this threshold will be excluded from correlation analysis but listed in footnote. |
| |
| Returns |
| ------- |
| tuple[Figure, pd.DataFrame] |
| - Figure: Matplotlib figure containing: |
| - Correlation heatmap with values |
| - Title showing subset and sample size |
| - Footnote listing excluded parameters |
| - DataFrame: Pivot table of filtered data used for correlation analysis |
| |
| Notes |
| ----- |
| - Uses abbreviated parameter names for cleaner display (e.g., "DO" for "Dissolved Oxygen") |
| - Masks upper triangle of correlation matrix |
| - Colors correlations using RdBu_r colormap centered at 0 |
| - Includes data completeness information in footnote |
| - Caches results using streamlit cache decorator |
| """ |
| measured_params = ( |
| df[df["Org_Analyte_Name"].isin(analyte_names)] |
| .groupby("Org_Analyte_Name", observed=True) |
| .size() |
| ) |
|
|
| |
| pivot_df = df[ |
| df["Org_Analyte_Name"].isin(set(measured_params.index) & set(analyte_names)) |
| ].pivot_table( |
| index="Activity_Start_Date_Time", |
| columns="Org_Analyte_Name", |
| values="Org_Result_Value", |
| observed=False, |
| ) |
| name_mapping = { |
| "Depth, Secchi Disk Depth": "Secchi Depth", |
| "Dissolved Oxygen": "DO", |
| "Fecal Coliform (MPN)": "Fecal Coliform", |
| "Total Nitrogen": "TN", |
| "Total Phosphorus": "TP", |
| } |
|
|
| |
| completeness = {} |
| for param in measured_params.index: |
| if param in analyte_names and param in pivot_df.columns: |
| total_measurements = measured_params[param] |
| |
| valid_values = pivot_df[param].notna().sum() |
| |
| new_name = name_mapping.get(param, param) |
| completeness[new_name] = valid_values / total_measurements |
|
|
| completeness = pd.Series(completeness) |
| pivot_df = pivot_df.rename(columns=name_mapping) |
|
|
| |
| completeness = pivot_df.notna().mean() |
| valid_params = completeness[completeness >= threshold].index |
| excluded_params = completeness[completeness < threshold] |
|
|
| |
| pivot_df = pivot_df[valid_params] |
|
|
| |
| corr = pivot_df.corr() |
|
|
| |
| n_samples = len(df) |
|
|
| fig = plt.figure(figsize=(6, 7)) |
|
|
| |
| gs = fig.add_gridspec( |
| 3, |
| 1, |
| height_ratios=[ |
| 1, |
| 4, |
| 1.5, |
| ], |
| hspace=0.4, |
| ) |
|
|
| |
| title_ax = fig.add_subplot(gs[0]) |
| heatmap_ax = fig.add_subplot(gs[1]) |
| footnote_ax = fig.add_subplot(gs[2]) |
|
|
| |
| mask = np.triu(np.ones_like(corr, dtype=bool)) |
| heatmap = sns.heatmap( |
| corr, |
| mask=mask, |
| annot=True, |
| cmap="RdBu_r", |
| center=0, |
| vmin=-1, |
| vmax=1, |
| ax=heatmap_ax, |
| yticklabels=1, |
| cbar=True, |
| xticklabels=1, |
| ) |
|
|
| |
| heatmap_ax.set_xticklabels( |
| heatmap_ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor" |
| ) |
|
|
| heatmap_ax.tick_params(axis="x", pad=10) |
|
|
| |
| colorbar = heatmap.figure.axes[-1] |
| ticks = colorbar.get_yticks() |
| colorbar.set_yticks(ticks) |
| tick_labels = [f"{x:>8.2f}" for x in ticks] |
| colorbar.set_yticklabels(tick_labels) |
|
|
| |
| heatmap_ax.set_yticklabels(heatmap_ax.get_yticklabels(), rotation=0) |
|
|
| |
| heatmap_ax.set_xlabel("") |
| heatmap_ax.set_ylabel("") |
|
|
| |
| footnote_ax.set_frame_on(False) |
| footnote_ax.set_xticks([]) |
| footnote_ax.set_yticks([]) |
|
|
| |
| if not excluded_params.empty: |
| footnote_text = "Excluded parameters (<{:.0%} data completeness):\n".format( |
| threshold |
| ) |
| for param, completeness_val in excluded_params.items(): |
| footnote_text += f" - {param}: {completeness_val:.1%} complete\n" |
|
|
| footnote_ax.text( |
| 0.01, |
| 0.40, |
| footnote_text.rstrip(), |
| ha="left", |
| va="center", |
| fontsize=9, |
| fontstyle="italic", |
| transform=footnote_ax.transAxes, |
| ) |
|
|
| title_ax.set_frame_on(False) |
| title_ax.set_xticks([]) |
| title_ax.set_yticks([]) |
|
|
| display_filter = "Surface and Bottom" if filter_by == "All" else filter_by |
|
|
| |
| year_info = ( |
| f"Reporting Year {df['Reporting_Year'].iloc[0]}" |
| if len(df["Reporting_Year"].unique()) == 1 |
| else "All Years" |
| ) |
|
|
| |
| title_ax.text( |
| 0.45, |
| 0.8, |
| f"{subset_by}: {subset}", |
| ha="center", |
| va="center", |
| fontsize=12, |
| fontweight="bold", |
| transform=fig.transFigure, |
| ) |
| title_ax.text( |
| 0.45, |
| 0.75, |
| f"{display_filter}, {year_info} (n={n_samples:,})", |
| ha="center", |
| va="bottom", |
| fontsize=10, |
| fontstyle="italic", |
| transform=fig.transFigure, |
| ) |
|
|
| |
| |
| fig.canvas.draw() |
|
|
| |
| renderer = fig.canvas.get_renderer() |
| fig.get_tightbbox(renderer) |
|
|
| |
| fig.subplots_adjust(left=0.1, right=0.95, bottom=0.02, top=0.85, hspace=0.4) |
|
|
| return fig, pivot_df |
|
|
|
|
| def plot_np_ratios(df: pd.DataFrame) -> Figure: |
| |
| nutrients_df = ( |
| df[df["Org_Analyte_Name"].isin(["Total Nitrogen", "Total Phosphorus"])] |
| .pivot_table( |
| index=["Activity_Start_Date_Time", "Sector"], |
| columns="Org_Analyte_Name", |
| values="Org_Result_Value", |
| observed=True, |
| ) |
| .reset_index() |
| ) |
|
|
| |
| nutrients_df["N:P Ratio"] = ( |
| nutrients_df["Total Nitrogen"] / nutrients_df["Total Phosphorus"] |
| ) |
|
|
| |
| fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10)) |
|
|
| |
| sns.scatterplot( |
| data=nutrients_df, |
| x="Activity_Start_Date_Time", |
| y="N:P Ratio", |
| hue="Sector", |
| ax=ax1, |
| alpha=0.6, |
| ) |
| ax1.axhline(y=16, color="r", linestyle="--", label="Redfield Ratio (16:1)") |
| ax1.set_ylabel("N:P Ratio") |
| ax1.set_xlabel("Date") |
| ax1.set_title("N:P Ratio Over Time") |
|
|
| |
| ax1.legend(bbox_to_anchor=(1.05, 1), loc="upper left") |
|
|
| |
| sns.histplot(x=nutrients_df["N:P Ratio"].dropna(), ax=ax2) |
| ax2.axvline(x=16, color="r", linestyle="--", label="Redfield Ratio (16:1)") |
| ax2.set_xlabel("N:P Ratio") |
| ax2.set_title("Distribution of N:P Ratios") |
| ax2.legend() |
|
|
| |
| plt.tight_layout(rect=(0, 0, 0.9, 1)) |
| return fig |
|
|
|
|
| def altair_plot_np_ratios(df: pd.DataFrame) -> alt.VConcatChart: |
| |
| nutrients_df = ( |
| df[df["Org_Analyte_Name"].isin(["Total Nitrogen", "Total Phosphorus"])] |
| .pivot_table( |
| index=["Activity_Start_Date_Time", "Sector"], |
| columns="Org_Analyte_Name", |
| values="Org_Result_Value", |
| observed=True, |
| ) |
| .reset_index() |
| ) |
|
|
| |
| nutrients_df["N:P Ratio"] = ( |
| nutrients_df["Total Nitrogen"] / nutrients_df["Total Phosphorus"] |
| ) |
|
|
| |
| time_series = ( |
| alt.Chart(nutrients_df) |
| .mark_circle(size=60) |
| .encode( |
| x=alt.X( |
| "Activity_Start_Date_Time:T", |
| axis=alt.Axis(format="%Y", tickCount="year"), |
| title="Date", |
| ), |
| y=alt.Y(r"N\:P Ratio:Q", title="N:P Ratio"), |
| color="Sector:N", |
| tooltip=[ |
| alt.Tooltip("Activity_Start_Date_Time:T", title="Date"), |
| alt.Tooltip(r"N\:P Ratio:Q", format=".0f", title="N:P Ratio"), |
| alt.Tooltip("Sector:N", title="Sector"), |
| ], |
| ) |
| .properties(title="N:P Ratio Over Time", width=600, height=300) |
| .interactive() |
| ) |
|
|
| |
| redfield_line = ( |
| alt.Chart(pd.DataFrame({"y": [16]})).mark_rule(color="red").encode(y="y:Q") |
| ) |
|
|
| |
| histogram = ( |
| alt.Chart(nutrients_df) |
| .mark_bar() |
| .encode( |
| x=alt.X(r"N\:P Ratio:Q", bin=alt.Bin(maxbins=30), title="N:P Ratio"), |
| y="count()", |
| tooltip=["count()"], |
| ) |
| .properties(title="Distribution of N:P Ratios", width=600, height=300) |
| .interactive() |
| ) |
|
|
| |
| redfield_hist_line = ( |
| alt.Chart(pd.DataFrame({"x": [16]})).mark_rule(color="red").encode(x="x:Q") |
| ) |
|
|
| |
| combined_chart = alt.vconcat( |
| time_series + redfield_line, histogram + redfield_hist_line |
| ).resolve_scale(y="independent") |
|
|
| return combined_chart |
|
|
|
|
| def plot_calendar_heatmap( |
| df: pd.DataFrame, |
| analyte: str, |
| colormap: str | None = None, |
| position_filter: str = "All", |
| ) -> Figure: |
| data = df[df["Org_Analyte_Name"] == analyte].copy() |
| if data.empty: |
| raise ValueError( |
| f"No data available for {analyte} with position filter: {position_filter}" |
| ) |
| result_unit = data["Org_Result_Unit"].iloc[0] if not data.empty else "" |
| data["Year"] = data["Activity_Start_Date_Time"].dt.year |
| data["Month"] = data["Activity_Start_Date_Time"].dt.month |
|
|
| pivot_data = data.pivot_table( |
| values="Org_Result_Value", index="Year", columns="Month", aggfunc="mean" |
| ) |
|
|
| |
| if analyte in ["Fecal Coliform (MPN)"]: |
| cmap = "viridis" |
| elif analyte in ["Temperature, Water"]: |
| cmap = "coolwarm" |
| elif analyte in ["Dissolved Oxygen"]: |
| cmap = "RdYlBu" |
| elif analyte in ["Total Nitrogen", "Total Phosphorus"]: |
| cmap = "GnBu" |
| elif analyte in ["Depth, Secchi Disk Depth"]: |
| cmap = "Blues_r" |
| else: |
| cmap = "Blues" |
|
|
| |
| if colormap: |
| cmap = colormap |
|
|
| fig, ax = plt.subplots(figsize=(6, len(pivot_data) * 0.5)) |
|
|
| |
| sns.heatmap( |
| pivot_data, |
| cmap=cmap, |
| annot=True, |
| fmt=".2f", |
| cbar_kws={"label": result_unit}, |
| annot_kws={"size": 6}, |
| ) |
| if position_filter == "All": |
| position_filter = "Surface and Bottom" |
| ax.set_title( |
| f"Monthly Averages: {analyte} ({position_filter.lower()})", fontsize=10, pad=10 |
| ) |
| ax.tick_params(axis="both", which="major", labelsize=7) |
| ax.set_xlabel("Month", fontsize=6) |
| ax.set_ylabel("Year", fontsize=6) |
|
|
| |
| colorbar = ax.collections[0].colorbar |
| colorbar.ax.tick_params(labelsize=7) |
| colorbar.set_label(result_unit, size=7) |
|
|
| return fig |
|
|
|
|
| def plot_seasonal_salinity( |
| salinity_data: pd.DataFrame, |
| year: str, |
| basemap_provider, |
| alpha=0.5, |
| shapefile_path="data/SAB/SAB.shp", |
| reporting_end_month: int = 10, |
| ): |
| """ |
| Create seasonal plots of mean salinity values by WBID with basemap. |
| Uses configurable Reporting Year with meteorological seasons. |
| |
| Args: |
| salinity_data: DataFrame containing salinity measurements |
| year: Reporting Year to filter data for (str) |
| reporting_end_month: Last month of the reporting year (1-12, default=10 for October) |
| """ |
| |
| wbids = gpd.read_file(shapefile_path) |
| relevant_wbids = salinity_data["WBID"].unique() |
| wbids = wbids[wbids["WBID"].isin(relevant_wbids)] |
| wbids = wbids.to_crs(epsg=3857) |
|
|
| |
| year_data = salinity_data[salinity_data["Reporting_Year"] == int(year)].copy() |
|
|
| |
| def get_quarter(date, reporting_end_month): |
| month = date.month |
|
|
| |
| month_offset = (12 - reporting_end_month) % 12 |
|
|
| |
| adjusted_month = ((month + month_offset) % 12) or 12 |
|
|
| |
| return f"Q{((adjusted_month - 1) // 3) + 1}" |
|
|
| |
| year_data.loc[:, "quarter"] = year_data["Activity_Start_Date_Time"].apply( |
| lambda x: get_quarter(x, reporting_end_month) |
| ) |
|
|
| |
| seasonal_means = ( |
| year_data.groupby(["WBID", "quarter"], observed=True)["Salinity"] |
| .mean() |
| .reset_index() |
| ) |
|
|
| fig = plt.figure(figsize=(20, 14)) |
|
|
| |
| colors = ["#08519c", "#73a9cf", "#fee090", "#fc8d59", "#d73027"] |
| cmap = LinearSegmentedColormap.from_list("custom", colors, N=100) |
|
|
| |
| vmin = seasonal_means["Salinity"].min() |
| vmax = 40 |
|
|
| |
| bounds = wbids.total_bounds |
| x_buffer = (bounds[2] - bounds[0]) * 0.05 |
| y_buffer = (bounds[3] - bounds[1]) * 0.05 |
| extent = [ |
| bounds[0] - x_buffer, |
| bounds[2] + x_buffer, |
| bounds[1] - y_buffer, |
| bounds[3] + y_buffer, |
| ] |
|
|
| |
| gs = fig.add_gridspec( |
| 2, |
| 2, |
| width_ratios=[1, 1], |
| wspace=0.05, |
| hspace=-0.15, |
| left=0.02, |
| right=0.98, |
| top=0.95, |
| bottom=0.05, |
| ) |
|
|
| |
| def get_quarter_dates(quarter: str, year: int, reporting_end_month: int) -> str: |
| |
| first_month = (reporting_end_month % 12) + 1 |
|
|
| |
| quarter_num = int(quarter[1]) |
| start_month = ((first_month - 1 + ((quarter_num - 1) * 3)) % 12) + 1 |
| end_month = ((start_month + 2) % 12) or 12 |
|
|
| |
| |
| start_year = int(year) - 1 if start_month > reporting_end_month else int(year) |
| end_year = start_year |
| if end_month < start_month: |
| end_year += 1 |
|
|
| start_date = pd.Timestamp(f"{start_year}-{start_month:02d}-01") |
| end_date = pd.Timestamp( |
| f"{end_year}-{end_month:02d}-{pd.Timestamp(f'{end_year}-{end_month:02d}').days_in_month}" |
| ) |
|
|
| return f"{start_date.strftime('%b %d, %Y')} - {end_date.strftime('%b %d, %Y')}" |
|
|
| |
| quarters = ["Q1", "Q2", "Q3", "Q4"] |
|
|
| for idx, quarter in enumerate(quarters): |
| ax = fig.add_subplot(gs[idx // 2, idx % 2]) |
|
|
| quarter_data = seasonal_means[seasonal_means["quarter"] == quarter] |
| merged = wbids.merge(quarter_data, on="WBID", how="left") |
|
|
| |
| merged.plot( |
| column="Salinity", |
| ax=ax, |
| cmap=cmap, |
| vmin=vmin, |
| vmax=vmax, |
| alpha=0.7, |
| missing_kwds={"color": "lightgrey", "alpha": 0.5}, |
| ) |
|
|
| ctx.add_basemap(ax, source=basemap_provider, zoom=11, alpha=alpha) |
|
|
| ax.set_xlim(extent[0], extent[1]) |
| ax.set_ylim(extent[2], extent[3]) |
|
|
| |
| date_range = get_quarter_dates(quarter, int(year), reporting_end_month) |
|
|
| |
| if idx < 2: |
| ax.set_title( |
| f"Quarter {quarter[1]} Mean Salinity\n{date_range}", |
| pad=15, |
| fontsize=10, |
| ) |
| else: |
| ax.set_title( |
| f"Quarter {quarter[1]} Mean Salinity\n{date_range}", |
| pad=5, |
| fontsize=10, |
| ) |
| ax.set_axis_off() |
|
|
| |
| norm = plt.Normalize(vmin=vmin, vmax=vmax) |
| sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm) |
| sm.set_array([]) |
| fig.colorbar( |
| sm, |
| ax=fig.axes, |
| orientation="vertical", |
| label="Salinity (ppt)", |
| pad=0.01, |
| fraction=0.015, |
| ticks=np.arange(0, 45, 5), |
| ) |
|
|
| return fig |
|
|
|
|
| def plot_seasonal_salinity_for_bays( |
| salinity_data: pd.DataFrame, |
| year: str, |
| basemap_provider=ctx.providers.USGS.USTopo, |
| alpha=0.5, |
| shapefile_path="data/SAB/SAB.shp", |
| wbids=None, |
| reporting_end_month: int = 10, |
| ): |
| """ |
| Create seasonal plots of mean salinity values by WBID for N, E, W, SAB, GL and Lake Powell. |
| """ |
| if wbids is None: |
| wbids = gpd.read_file(shapefile_path) |
| if wbids.crs is None: |
| wbids.set_crs(epsg=6439, inplace=True) |
| wbids = wbids.to_crs(epsg=3857) |
| fig = plot_seasonal_salinity( |
| salinity_data.query( |
| "WBID.isin(['1061A', '1061B', '1061C', '1061D', '1061E', '1061F', '1061G', '1061H', '1055A'])" |
| ), |
| year=year, |
| basemap_provider=basemap_provider, |
| alpha=alpha, |
| shapefile_path=shapefile_path, |
| reporting_end_month=reporting_end_month, |
| ) |
| return fig |
|
|
|
|
| def plot_do_temp_relationship(df: pd.DataFrame) -> Figure: |
| """ |
| Create a scatter plot of DO vs temperature with regression line using seaborn. |
| |
| Parameters: |
| ----------- |
| df : pd.DataFrame |
| Input dataframe containing DO and temperature measurements |
| |
| Returns: |
| -------- |
| Figure |
| Matplotlib figure containing the plot |
| """ |
| do_temp_data = ( |
| df[df["Org_Analyte_Name"].isin(["Dissolved Oxygen", "Temperature, Water"])] |
| .pivot_table( |
| index=["Activity_Start_Date_Time", "Station_Number", "Sample_Position"], |
| columns="Org_Analyte_Name", |
| values="Org_Result_Value", |
| observed=True, |
| ) |
| .reset_index() |
| .dropna(subset=["Dissolved Oxygen", "Temperature, Water"]) |
| ) |
|
|
| |
| custom_palette = {"Surface": "#5AA4D8", "Bottom": "#1B4B8A"} |
|
|
| |
| g = sns.lmplot( |
| data=do_temp_data, |
| x="Temperature, Water", |
| y="Dissolved Oxygen", |
| hue="Sample_Position", |
| hue_order=["Bottom", "Surface"], |
| palette=custom_palette, |
| scatter_kws={"alpha": 0.5, "zorder": 2, "s": 20}, |
| line_kws={"zorder": 3, "linewidth": 1}, |
| height=8, |
| aspect=1.5, |
| legend=False, |
| ) |
|
|
| |
| ax = g.axes[0, 0] |
| ax.axhline( |
| y=4.8, color="#FF8C00", linestyle="--", alpha=0.9, zorder=1, linewidth=1 |
| ) |
| ax.text( |
| ax.get_xlim()[0], |
| 4.9, |
| " 4.8 mg/L DO threshold", |
| ha="left", |
| va="bottom", |
| color="#FF8C00", |
| alpha=0.9, |
| ) |
|
|
| |
| ax.spines["top"].set_visible(False) |
| ax.spines["right"].set_visible(False) |
| ax.spines["left"].set_visible(False) |
| ax.spines["bottom"].set_color("black") |
| ax.spines["bottom"].set_linewidth(0.5) |
|
|
| g.set_axis_labels("Water Temperature (°C)", "Dissolved Oxygen (mg/L)") |
| ax.set_title("Dissolved Oxygen vs Water Temperature", pad=20, fontsize=16) |
|
|
| |
| handles, labels = ax.get_legend_handles_labels() |
| |
| handles = handles[::-1] |
| labels = labels[::-1] |
| ax.legend( |
| handles, |
| labels, |
| bbox_to_anchor=(1.0, 1.0), |
| loc="upper right", |
| frameon=False, |
| handletextpad=0.5, |
| ) |
|
|
| |
| ax.grid(True, axis="y", alpha=0.15, linestyle="-", color="gray") |
|
|
| |
| ax.tick_params(axis="y", which="both", length=0) |
|
|
| |
| ymin = max(int(min(do_temp_data["Dissolved Oxygen"].min(), 4.8) * 0.9) - 1, 0) |
| ymax = do_temp_data["Dissolved Oxygen"].max() * 1.1 |
| ax.set_ylim(ymin, ymax) |
| yticks = np.arange(ymin, ymax, 2) |
| ax.set_yticks(yticks) |
|
|
| return g.figure |
|
|
|
|
| def plotly_plot_do_temp_relationship(df: pd.DataFrame) -> go.Figure: |
| """ |
| Create an interactive scatter plot of DO vs temperature with regression lines using Plotly. |
| Matches the style and features of the original matplotlib/seaborn plot. |
| |
| Parameters: |
| ----------- |
| df : pd.DataFrame |
| Input dataframe containing DO and temperature measurements |
| |
| Returns: |
| -------- |
| go.Figure |
| Plotly figure object |
| """ |
| |
| do_temp_data = ( |
| df[df["Org_Analyte_Name"].isin(["Dissolved Oxygen", "Temperature, Water"])] |
| .pivot_table( |
| index=[ |
| "Activity_Start_Date_Time", |
| "Station_Number", |
| "Sample_Position", |
| "Sector", |
| ], |
| columns="Org_Analyte_Name", |
| values="Org_Result_Value", |
| observed=True, |
| ) |
| .reset_index() |
| .dropna(subset=["Dissolved Oxygen", "Temperature, Water"]) |
| ) |
|
|
| |
| fig = go.Figure() |
|
|
| |
| colors = {"Surface": "#8da0cb", "Bottom": "#fc8d62"} |
|
|
| |
| for position in ["Surface", "Bottom"]: |
| pos_data = do_temp_data[do_temp_data["Sample_Position"] == position] |
|
|
| |
| fig.add_trace( |
| go.Scatter( |
| x=pos_data["Temperature, Water"], |
| y=pos_data["Dissolved Oxygen"], |
| mode="markers", |
| name=position, |
| marker=dict(color=colors[position], size=8, opacity=0.6), |
| hovertemplate=( |
| "Temperature: %{x:.1f}°C<br>" |
| "DO: %{y:.1f} mg/L<br>" |
| "Position: " + position + "<br>" |
| "Station: %{customdata[0]}<br>" |
| "Sector: %{customdata[1]}<br>" |
| "<extra></extra>" |
| ), |
| customdata=pos_data[["Station_Number", "Sector"]], |
| ) |
| ) |
|
|
| |
| z = np.polyfit(pos_data["Temperature, Water"], pos_data["Dissolved Oxygen"], 1) |
| p = np.poly1d(z) |
| x_range = np.linspace( |
| pos_data["Temperature, Water"].min(), |
| pos_data["Temperature, Water"].max(), |
| 100, |
| ) |
|
|
| fig.add_trace( |
| go.Scatter( |
| x=x_range, |
| y=p(x_range), |
| mode="lines", |
| line=dict(color=colors[position], dash="dash"), |
| name=f"{position} Trend", |
| hovertemplate=None, |
| hoverinfo="skip", |
| showlegend=False, |
| ) |
| ) |
|
|
| |
| fig.add_hline( |
| y=4.8, |
| line=dict(color="#FF8C00", width=1, dash="dash"), |
| opacity=0.5, |
| annotation_text="4.8 mg/L DO threshold", |
| annotation_position="left", |
| annotation=dict( |
| font=dict(color="#FF8C00", size=12), |
| xanchor="left", |
| yanchor="bottom", |
| opacity=0.8, |
| ), |
| ) |
|
|
| |
| fig.update_layout( |
| title=dict( |
| text="Dissolved Oxygen vs Water Temperature", |
| x=0.5, |
| y=0.95, |
| xanchor="center", |
| yanchor="top", |
| font=dict(size=16), |
| ), |
| xaxis_title="Water Temperature (°C)", |
| yaxis_title="Dissolved Oxygen (mg/L)", |
| legend_title="Sample Position", |
| legend=dict( |
| yanchor="top", |
| y=1, |
| xanchor="left", |
| x=1.05, |
| ), |
| template="plotly_white", |
| width=800, |
| height=600, |
| showlegend=True, |
| ) |
|
|
| |
| fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor="rgba(128, 128, 128, 0.2)") |
| fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor="rgba(128, 128, 128, 0.2)") |
|
|
| return fig |
|
|
|
|
| def altair_plot_do_temp_relationship(df: pd.DataFrame) -> alt.LayerChart: |
| """ |
| Create an interactive scatter plot of DO vs temperature with regression lines using Altair. |
| Matches the style and features of the original matplotlib/seaborn plot. |
| |
| Parameters: |
| ----------- |
| df : pd.DataFrame |
| Input dataframe containing DO and temperature measurements |
| |
| Returns: |
| -------- |
| alt.Chart |
| Altair chart object |
| """ |
| |
| do_temp_data = ( |
| df[df["Org_Analyte_Name"].isin(["Dissolved Oxygen", "Temperature, Water"])] |
| .pivot_table( |
| index=[ |
| "Activity_Start_Date_Time", |
| "Station_Number", |
| "Sample_Position", |
| "Sector", |
| ], |
| columns="Org_Analyte_Name", |
| values="Org_Result_Value", |
| observed=True, |
| ) |
| .reset_index() |
| .dropna(subset=["Dissolved Oxygen", "Temperature, Water"]) |
| ) |
|
|
| |
| scatter = ( |
| alt.Chart(do_temp_data) |
| .mark_circle(size=60, opacity=0.6) |
| .encode( |
| x=alt.X( |
| "Temperature, Water:Q", |
| title="Water Temperature (°C)", |
| scale=alt.Scale(zero=False), |
| ), |
| y=alt.Y( |
| "Dissolved Oxygen:Q", |
| title="Dissolved Oxygen (mg/L)", |
| scale=alt.Scale(zero=False), |
| ), |
| color=alt.Color( |
| "Sample_Position:N", |
| scale=alt.Scale( |
| domain=["Surface", "Bottom"], |
| range=["#8da0cb", "#fc8d62"], |
| ), |
| legend=alt.Legend(title="Sample Position"), |
| ), |
| tooltip=[ |
| alt.Tooltip("Temperature, Water:Q", title="Temperature", format=".1f"), |
| alt.Tooltip("Dissolved Oxygen:Q", title="DO", format=".1f"), |
| alt.Tooltip("Sample_Position:N", title="Position"), |
| alt.Tooltip("Sector:N", title="Sector"), |
| alt.Tooltip("Station_Number:N", title="Station"), |
| ], |
| ) |
| ) |
|
|
| |
| regression = ( |
| scatter.transform_regression( |
| "Temperature, Water", "Dissolved Oxygen", groupby=["Sample_Position"] |
| ) |
| .mark_line(size=2) |
| .encode( |
| color=alt.Color( |
| "Sample_Position:N", |
| scale=alt.Scale( |
| domain=["Surface", "Bottom"], range=["#8da0cb", "#fc8d62"] |
| ), |
| ) |
| ) |
| ) |
|
|
| |
| threshold_df = pd.DataFrame({"y": [5]}) |
| threshold_line = ( |
| alt.Chart(threshold_df) |
| .mark_rule(strokeDash=[4, 4], color="red", opacity=0.5) |
| .encode(y="y:Q") |
| ) |
|
|
| |
| threshold_label = ( |
| alt.Chart( |
| pd.DataFrame({"x": [do_temp_data["Temperature, Water"].min()], "y": [5.1]}) |
| ) |
| .mark_text( |
| align="left", |
| baseline="bottom", |
| color="red", |
| opacity=0.5, |
| text=" 5 mg/L DO threshold", |
| ) |
| .encode(x="x:Q", y="y:Q") |
| ) |
|
|
| |
| final_chart = ( |
| alt.layer(scatter, regression, threshold_line, threshold_label) |
| .properties( |
| width=800, |
| height=750, |
| ) |
| .configure_axis(grid=True, gridOpacity=0.3) |
| .interactive() |
| ) |
|
|
| return final_chart |
|
|
|
|
| @timer(include_params=True) |
| def generate_seasonal_plot(data, year, shapefile_path): |
| """Generate the seasonal trends plot""" |
| |
| wbids = gpd.read_file(shapefile_path) |
|
|
| |
| if isinstance(data, gpd.GeoDataFrame): |
| if data.crs is None: |
| |
| data.set_crs(epsg=4326, inplace=True) |
|
|
| |
| if wbids.crs is None: |
| wbids.set_crs(epsg=6439, inplace=True) |
|
|
| |
| wbids = wbids.to_crs(epsg=3857) |
|
|
| if st.session_state.get("DEBUG", False): |
| st.write("Debug Info:") |
| st.write( |
| { |
| "Shapefile CRS": wbids.crs, |
| "Input Data CRS": data.crs |
| if isinstance(data, gpd.GeoDataFrame) |
| else "Not a GeoDataFrame", |
| "GDAL Version": gdal.VersionInfo() |
| if "osgeo.gdal" in sys.modules |
| else "Not available", |
| "GeoPandas Version": gpd.__version__, |
| "Python Version": sys.version, |
| "File exists": Path(shapefile_path).exists(), |
| "Associated files": list(Path(shapefile_path).parent.glob("*.*")), |
| } |
| ) |
|
|
| return plot_seasonal_salinity_for_bays( |
| data, |
| year, |
| shapefile_path=shapefile_path, |
| wbids=wbids, |
| reporting_end_month=st.session_state.reporting_month, |
| ) |
|
|
|
|
| def plot_do_timeseries( |
| df: pd.DataFrame, |
| period: str = "Yearly", |
| sector: str = "All", |
| epa_thresh: float = 4.8, |
| ) -> Figure: |
| """ |
| Create a time series plot of dissolved oxygen levels for surface and bottom measurements. |
| |
| Reference: |
| https://www.hudsonriver.org/ccmp/soe/water-quality/do |
| |
| Parameters: |
| ----------- |
| df : pd.DataFrame |
| Filtered dataframe containing dissolved oxygen measurements |
| period : str |
| 'yearly' or 'monthly' aggregation period |
| epa_thresh : float |
| EPA threshold value for DO in mg/L |
| |
| Returns: |
| -------- |
| Figure |
| Matplotlib figure containing the plot |
| """ |
| period = period.lower() |
| |
| do_data = df[ |
| (df["Org_Analyte_Name"] == "Dissolved Oxygen") |
| & (df["Sample_Position"].isin(["Surface", "Bottom"])) |
| ].copy() |
|
|
| |
| if period == "yearly": |
| do_data["Period"] = do_data["Reporting_Year"] |
| else: |
| do_data["Period"] = pd.to_datetime( |
| do_data["Activity_Start_Date_Time"] |
| ).dt.to_period("M") |
| do_data["Period_Start"] = do_data["Period"].dt.to_timestamp() |
|
|
| |
| means = ( |
| do_data.groupby(["Period", "Sample_Position"], observed=True)[ |
| "Org_Result_Value" |
| ] |
| .mean() |
| .reset_index() |
| .pivot(index="Period", columns="Sample_Position", values="Org_Result_Value") |
| ) |
|
|
| |
| fig, ax = plt.subplots(figsize=(15, 8)) |
|
|
| |
| if period == "yearly": |
| x_values = np.array(means.index.astype(float)) |
| else: |
| |
| x_values = np.array( |
| [pd.Period(idx).to_timestamp() for idx in means.index], |
| dtype="datetime64[ns]", |
| ) |
|
|
| |
| for i, (idx, row) in enumerate(means.iterrows()): |
| x_val = x_values[i] |
| ax.plot( |
| [x_val, x_val], |
| [row["Bottom"], row["Surface"]], |
| color="lightgray", |
| linewidth=1, |
| zorder=1, |
| solid_capstyle="round", |
| ) |
|
|
| |
| n_points = len(x_values) |
| base_size = 80 |
| min_size = 20 |
|
|
| |
| point_size = max( |
| min_size, |
| base_size * math.exp(-0.0015 * n_points), |
| ) |
| |
| surface_scatter = ax.scatter( |
| x_values, |
| means["Surface"], |
| color="#5AA4D8", |
| s=point_size, |
| zorder=2, |
| label="Surface", |
| edgecolors="white", |
| linewidth=1, |
| alpha=0.9, |
| ) |
| bottom_scatter = ax.scatter( |
| x_values, |
| means["Bottom"], |
| color="#1B4B8A", |
| s=point_size, |
| zorder=2, |
| label="Bottom", |
| edgecolors="white", |
| linewidth=1, |
| alpha=0.9, |
| ) |
|
|
| |
| threshold_line = ax.axhline( |
| y=epa_thresh, |
| color="#FF8C00", |
| linestyle="--", |
| alpha=0.9, |
| linewidth=1, |
| label=f"EPA threshold: {epa_thresh} mg/L", |
| zorder=0, |
| ) |
|
|
| |
| ax.legend( |
| handles=[surface_scatter, bottom_scatter, threshold_line], |
| loc="upper right", |
| frameon=False, |
| ncol=1, |
| bbox_to_anchor=(1.0, 1.0), |
| handletextpad=0.5, |
| ) |
|
|
| |
| ax.spines["top"].set_visible(False) |
| ax.spines["right"].set_visible(False) |
| ax.spines["left"].set_visible(False) |
| ax.spines["bottom"].set_color("black") |
| ax.spines["bottom"].set_linewidth(0.5) |
|
|
| |
| ax.set_xlabel("Year" if period == "yearly" else "Month") |
| ax.set_ylabel("Dissolved Oxygen (mg/L)") |
| ax.set_title("Long-term Dissolved Oxygen Trends") |
| ax.grid(True, axis="y", alpha=0.15, linestyle="-", color="gray") |
|
|
| |
| ymin = max(int(min(means["Bottom"].min(), epa_thresh) * 0.9) - 1, 0) |
| |
| ymax = means["Surface"].max() * 1.1 |
| ax.set_ylim(ymin, ymax) |
| yticks = np.arange(ymin, ymax, 2) |
| ax.set_yticks(yticks) |
|
|
| |
| ax.tick_params(axis="y", which="both", length=0) |
|
|
| |
| if period == "monthly": |
| ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y")) |
| ax.xaxis.set_major_locator(mdates.YearLocator()) |
| plt.xticks(rotation=0) |
|
|
| |
| start_date = mdates.date2num( |
| pd.Timestamp(min(x_values)) - pd.DateOffset(months=1) |
| ) |
| end_date = mdates.date2num( |
| pd.Timestamp(max(x_values)) + pd.DateOffset(months=1) |
| ) |
| ax.set_xlim(mdates.num2date(start_date), mdates.num2date(end_date)) |
| else: |
| |
| min_year = float(np.floor(min(x_values))) |
| max_year = float(np.ceil(max(x_values))) |
|
|
| |
| years = np.arange(min_year, max_year + 1) |
| ax.set_xticks(years) |
|
|
| |
| ax.set_xlim( |
| min_year - 0.083, max_year + 0.083 |
| ) |
|
|
| |
| ax.yaxis.tick_left() |
| ax.yaxis.set_label_position("left") |
|
|
| plt.tight_layout() |
| return fig |
|
|
|
|
| def plot_do_scatter( |
| df: pd.DataFrame, |
| sector: str = "All", |
| thresh: float = 3.0, |
| ) -> Figure: |
| """ |
| Create a scatter plot of all dissolved oxygen measurements. |
| |
| Parameters: |
| ----------- |
| df : pd.DataFrame |
| Filtered dataframe containing dissolved oxygen measurements |
| sector : str |
| Sector to filter by, or 'All' for all sectors |
| thresh : float |
| Threshold value for DO in mg/L |
| |
| Returns: |
| -------- |
| Figure |
| Matplotlib figure containing the plot |
| """ |
| |
| do_data = df[ |
| (df["Org_Analyte_Name"] == "Dissolved Oxygen") |
| & (df["Sample_Position"].isin(["Surface", "Bottom"])) |
| ].copy() |
|
|
| |
| fig, ax = plt.subplots(figsize=(15, 8)) |
|
|
| |
| surface_data = do_data[do_data["Sample_Position"] == "Surface"] |
| bottom_data = do_data[do_data["Sample_Position"] == "Bottom"] |
|
|
| |
| ax.scatter( |
| surface_data["Activity_Start_Date_Time"], |
| surface_data["Org_Result_Value"], |
| color="#1f77b4", |
| s=25, |
| alpha=0.5, |
| label="Surface", |
| zorder=2, |
| ) |
| ax.scatter( |
| bottom_data["Activity_Start_Date_Time"], |
| bottom_data["Org_Result_Value"], |
| color="#7fbf7b", |
| s=25, |
| alpha=0.5, |
| label="Bottom", |
| zorder=2, |
| ) |
|
|
| |
| hurricane_date = pd.Timestamp("2018-10-10") |
|
|
| |
| data_start = min(do_data["Activity_Start_Date_Time"]) |
| data_end = max(do_data["Activity_Start_Date_Time"]) |
|
|
| |
| if data_start <= hurricane_date <= data_end: |
| |
| ymin, ymax = ax.get_ylim() |
| line_height = ymax * 0.95 |
|
|
| |
| ax.axvline( |
| x=hurricane_date, |
| color="gray", |
| linestyle="-", |
| alpha=0.6, |
| linewidth=1, |
| ymin=0, |
| ymax=line_height / ymax, |
| zorder=1, |
| ) |
|
|
| |
| ax.scatter( |
| [hurricane_date], |
| [line_height], |
| color="gray", |
| s=25, |
| alpha=0.8, |
| zorder=2, |
| ) |
|
|
| |
| ax.annotate( |
| "Oct 2018", |
| xy=(hurricane_date, line_height), |
| xytext=(5, 0), |
| textcoords="offset points", |
| ha="left", |
| va="bottom", |
| color="gray", |
| fontsize=10, |
| weight="bold", |
| ) |
|
|
| ax.annotate( |
| "Hurricane Michael", |
| xy=(hurricane_date, line_height), |
| xytext=(5, -12), |
| textcoords="offset points", |
| ha="left", |
| va="bottom", |
| color="gray", |
| fontsize=10, |
| ) |
|
|
| |
| ax.axhline( |
| y=thresh, |
| color="red", |
| linestyle=":", |
| alpha=0.9, |
| linewidth=1.5, |
| label=f"Threshold: {thresh} mg/L", |
| zorder=1, |
| ) |
|
|
| |
| ax.legend( |
| loc="upper right", |
| frameon=True, |
| ncol=1, |
| bbox_to_anchor=(1.0, 1.0), |
| handletextpad=0.5, |
| fontsize=12, |
| ) |
|
|
| |
| ax.spines["top"].set_visible(False) |
| ax.spines["right"].set_visible(False) |
| ax.spines["left"].set_visible(False) |
| ax.spines["bottom"].set_color("black") |
| ax.spines["bottom"].set_linewidth(0.5) |
|
|
| |
| title = "DO mg/L" |
| if sector != "All": |
| title += f" - {sector}" |
| ax.set_title(title, fontsize=14) |
|
|
| |
| ax.grid(True, axis="both", alpha=0.15, linestyle="-", color="gray") |
|
|
| |
| ymin = max(int(min(do_data["Org_Result_Value"].min(), thresh) * 0.9) - 1, 0) |
| ymax = do_data["Org_Result_Value"].max() * 1.1 |
| ax.set_ylim(ymin, ymax) |
| yticks = np.arange(ymin, ymax, 2) |
| ax.set_yticks(yticks) |
|
|
| |
| ax.tick_params(axis="y", which="both", length=0) |
|
|
| |
| years = mdates.YearLocator() |
| ax.xaxis.set_major_locator(years) |
| ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y")) |
|
|
| plt.tight_layout() |
| return fig |
|
|
|
|
| def plot_scatter( |
| df: pd.DataFrame, |
| parameter: str, |
| sector: str = "All", |
| thresh: float | None = None, |
| ) -> tuple[Figure, pd.DataFrame]: |
| """ |
| Create a scatter plot of water quality measurements for any parameter. |
| |
| Parameters: |
| ----------- |
| df : pd.DataFrame |
| Filtered dataframe containing water quality measurements |
| parameter : str |
| Name of the parameter to plot (e.g., "Dissolved Oxygen", "Temperature, Water") |
| sector : str |
| Sector to filter by, or 'All' for all sectors |
| thresh : float | None |
| Optional threshold value to display on plot |
| |
| Returns: |
| -------- |
| tuple[Figure, pd.DataFrame] |
| - Figure: Matplotlib figure containing the scatter plot |
| - DataFrame: Filtered dataframe containing the parameter data used in the plot |
| """ |
| |
| param_data = df[ |
| (df["Org_Analyte_Name"] == parameter) |
| & (df["Sample_Position"].isin(["Surface", "Bottom"])) |
| ].copy() |
|
|
| if param_data.empty: |
| raise ValueError(f"No data found for parameter: {parameter}") |
|
|
| |
| unit = param_data["Org_Result_Unit"].iloc[0] |
|
|
| |
| fig, ax = plt.subplots(figsize=(15, 8)) |
|
|
| |
| surface_data = param_data[param_data["Sample_Position"] == "Surface"] |
| bottom_data = param_data[param_data["Sample_Position"] == "Bottom"] |
|
|
| |
| log_scale_parameters = [ |
| "Turbidity", |
| "Fecal Coliform (MPN)", |
| "Total Nitrogen", |
| "Total Phosphorus", |
| "Color", |
| ] |
| log_scale = parameter in log_scale_parameters |
|
|
| if log_scale: |
| ax.set_yscale("log") |
| ax.yaxis.set_major_formatter(plt.ScalarFormatter()) |
|
|
| |
| ymin = max( |
| param_data["Org_Result_Value"].min() * 0.5, 0.1 |
| ) |
| ymax = param_data["Org_Result_Value"].max() * 2 |
|
|
| if thresh is not None: |
| ymin = min(ymin, thresh * 0.5) |
|
|
| ax.set_ylim(ymin, ymax) |
|
|
| |
| log_ymin = np.floor(np.log10(ymin)) |
| log_ymax = np.ceil(np.log10(ymax)) |
| yticks = np.logspace(log_ymin, log_ymax, int(log_ymax - log_ymin) + 1) |
| ax.set_yticks(yticks) |
| ax.yaxis.set_major_formatter(plt.ScalarFormatter()) |
| ax.yaxis.set_minor_formatter(plt.NullFormatter()) |
|
|
| else: |
| |
| ymin = param_data["Org_Result_Value"].min() * 0.9 |
| ymax = param_data["Org_Result_Value"].max() * 1.1 |
| if thresh is not None: |
| ymin = min(ymin, thresh * 0.9) |
| ax.set_ylim(ymin, ymax) |
|
|
| |
| tick_range = ymax - ymin |
| if tick_range > 10: |
| tick_spacing = 2.0 |
| elif tick_range > 5: |
| tick_spacing = 1.0 |
| else: |
| tick_spacing = 0.5 |
| yticks = np.arange(np.floor(ymin), np.ceil(ymax), tick_spacing) |
| ax.set_yticks(yticks) |
|
|
| |
| handles = [] |
| labels = [] |
|
|
| |
| surface_scatter = ax.scatter( |
| surface_data["Activity_Start_Date_Time"], |
| surface_data["Org_Result_Value"], |
| color="#1f77b4", |
| s=25, |
| alpha=0.5, |
| label="Surface", |
| zorder=2, |
| ) |
| handles.append(surface_scatter) |
| labels.append("Surface") |
|
|
| |
| if not bottom_data.empty: |
| bottom_scatter = ax.scatter( |
| bottom_data["Activity_Start_Date_Time"], |
| bottom_data["Org_Result_Value"], |
| color="#7fbf7b", |
| s=25, |
| alpha=0.5, |
| label="Bottom", |
| zorder=2, |
| ) |
| handles.append(bottom_scatter) |
| labels.append("Bottom") |
|
|
| |
| hurricane_date = pd.Timestamp("2018-10-10") |
|
|
| |
| data_start = min(param_data["Activity_Start_Date_Time"]) |
| data_end = max(param_data["Activity_Start_Date_Time"]) |
|
|
| |
| if data_start <= hurricane_date <= data_end: |
| |
| ymin, ymax = ax.get_ylim() |
| line_height = ymax * 0.95 |
|
|
| |
| ax.axvline( |
| x=hurricane_date, |
| color="gray", |
| linestyle="-", |
| alpha=0.6, |
| linewidth=1, |
| ymin=0, |
| ymax=line_height / ymax, |
| zorder=1, |
| ) |
|
|
| |
| ax.scatter( |
| [hurricane_date], |
| [line_height], |
| color="gray", |
| s=25, |
| alpha=0.8, |
| zorder=2, |
| ) |
|
|
| |
| ax.annotate( |
| "Oct 2018", |
| xy=(hurricane_date, line_height), |
| xytext=(5, 0), |
| textcoords="offset points", |
| ha="left", |
| va="bottom", |
| color="gray", |
| fontsize=10, |
| weight="bold", |
| ) |
|
|
| ax.annotate( |
| "Hurricane Michael", |
| xy=(hurricane_date, line_height), |
| xytext=(5, -12), |
| textcoords="offset points", |
| ha="left", |
| va="bottom", |
| color="gray", |
| fontsize=10, |
| ) |
|
|
| |
| if thresh is not None: |
| threshold_line = ax.axhline( |
| y=thresh, |
| color="red", |
| linestyle=":", |
| alpha=0.9, |
| linewidth=1.5, |
| label=f"Threshold: {thresh} {unit}", |
| zorder=1, |
| ) |
| handles.append(threshold_line) |
| labels.append(f"Threshold: {thresh} {unit}") |
|
|
| |
| if parameter not in ["Depth, Secchi Disk Depth", "Temperature, Air"]: |
| ax.legend( |
| handles=handles, |
| labels=labels, |
| loc="upper right", |
| frameon=True, |
| ncol=1, |
| bbox_to_anchor=(1.0, 1.0), |
| handletextpad=0.5, |
| fontsize=12, |
| ) |
|
|
| |
| ax.spines["top"].set_visible(False) |
| ax.spines["right"].set_visible(False) |
| ax.spines["left"].set_visible(False) |
| ax.spines["bottom"].set_color("black") |
| ax.spines["bottom"].set_linewidth(0.5) |
|
|
| |
| title = parameter |
| if sector != "All": |
| title += f" - {sector}" |
| ax.set_title(title, fontsize=14) |
| |
| ax.set_ylabel(f"{unit}", fontsize=12) |
|
|
| |
| ax.grid(True, axis="both", alpha=0.15, linestyle="-", color="gray") |
|
|
| |
| ax.tick_params(axis="y", which="both", length=0) |
|
|
| |
| years = mdates.YearLocator() |
| ax.xaxis.set_major_locator(years) |
| ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y")) |
|
|
| plt.tight_layout() |
| return (fig, param_data) |
|
|
|
|
| @timer(include_params=True) |
| def plot_grouped_bars( |
| df: pd.DataFrame, |
| parameter: str, |
| year_range: tuple[int, int], |
| group_by: str = "sector", |
| ) -> tuple[Figure, pd.DataFrame]: |
| """ |
| Create a grouped bar chart showing means by sector or year for a selected parameter. |
| |
| Parameters: |
| ----------- |
| df : pd.DataFrame |
| Input dataframe containing water quality measurements |
| parameter : str |
| Name of the parameter to plot |
| year_range : tuple[int, int] |
| Start and end years to include in plot |
| group_by : str |
| How to group the bars - either "sector" (default) or "year" |
| |
| Returns: |
| -------- |
| tuple[Figure, pd.DataFrame] |
| - Figure: Matplotlib figure containing the grouped bar chart |
| - DataFrame: Contains the plotted data points with means and standard errors |
| """ |
| |
| plot_df = df[ |
| (df["Org_Analyte_Name"] == parameter) |
| & (df["Reporting_Year"] >= year_range[0]) |
| & (df["Reporting_Year"] <= year_range[1]) |
| ].copy() |
|
|
| if plot_df.empty: |
| raise ValueError( |
| f"No data available for {parameter} between {year_range[0]}-{year_range[1]}" |
| ) |
|
|
| |
| means_df = ( |
| plot_df.groupby(["Reporting_Year", "Sector"], observed=True)["Org_Result_Value"] |
| .agg(["mean", "sem"]) |
| .reset_index() |
| ) |
|
|
| |
| years = sorted(means_df["Reporting_Year"].unique()) |
| sectors = sorted(means_df["Sector"].unique()) |
|
|
| |
| if group_by == "year": |
| primary_categories = sectors |
| secondary_categories = years |
| x_values = years |
| group_column = "Reporting_Year" |
| category_column = "Sector" |
| x_label = "Reporting Year" |
| legend_title = "Sector" |
| else: |
| primary_categories = years |
| secondary_categories = sectors |
| x_values = sectors |
| group_column = "Sector" |
| category_column = "Reporting_Year" |
| x_label = "Sector" |
| legend_title = "Year" |
|
|
| n_groups = len(primary_categories) |
|
|
| colors = [ |
| "#E69F00", |
| "#56B4E9", |
| "#009E73", |
| "#F0E442", |
| "#0072B2", |
| "#D55E00", |
| "#CC79A7", |
| "#999999", |
| "#F5C710", |
| "#93AA00", |
| "#482677", |
| "#DA5724", |
| "#5082CF", |
| "#CD9BCD", |
| "#C1A43A", |
| ] |
|
|
| |
| fig, ax = plt.subplots(figsize=(12, 6)) |
|
|
| |
| bar_width = 0.8 / n_groups |
|
|
| |
| group_centers = ( |
| np.arange(len(secondary_categories)) + (bar_width * (n_groups - 1)) / 2 |
| ) |
|
|
| |
| for i, (category, color) in enumerate(zip(primary_categories, colors)): |
| category_data = means_df[means_df[category_column] == category] |
|
|
| |
| bars = ax.bar( |
| np.arange(len(secondary_categories)) + i * bar_width, |
| category_data["mean"], |
| bar_width, |
| label=str(category), |
| color=color, |
| alpha=0.7, |
| zorder=2, |
| ) |
|
|
| |
| ax.errorbar( |
| np.arange(len(secondary_categories)) + i * bar_width, |
| category_data["mean"], |
| yerr=category_data["sem"], |
| fmt="none", |
| color="black", |
| capsize=3, |
| capthick=1, |
| linewidth=1, |
| alpha=0.5, |
| zorder=3, |
| ) |
|
|
| |
| unit = plot_df["Org_Result_Unit"].iloc[0] |
| ax.set_xlabel(x_label) |
| title = f"{parameter} (Mean Annual{' ' + unit if unit else ''})" |
| ax.set_title(title) |
|
|
| |
| def wrap_labels(text, width=10): |
| """Wrap text at specified width using textwrap.""" |
| |
| text_str = str(text) |
| if len(text_str) > width: |
| return textwrap.fill(text_str, width=width) |
| return text_str |
|
|
| |
| ax.set_xticks(group_centers) |
| wrapped_labels = [wrap_labels(str(label)) for label in secondary_categories] |
| ax.set_xticklabels( |
| wrapped_labels, |
| ha="center", |
| va="top", |
| rotation=0, |
| ) |
|
|
| |
| ax.tick_params(axis="x", length=0) |
|
|
| |
| ax.text( |
| 0.99, |
| -0.15, |
| "Error bars represent ±1 standard error of the mean", |
| ha="right", |
| va="top", |
| transform=ax.transAxes, |
| fontsize=9, |
| fontstyle="italic", |
| ) |
|
|
| |
| plt.tight_layout(rect=(0, 0.2, 1, 1)) |
|
|
| |
| ax.grid(True, axis="y", alpha=0.2, linestyle="-", zorder=1) |
|
|
| |
| ax.spines["top"].set_visible(False) |
| ax.spines["right"].set_visible(False) |
| ax.spines["left"].set_visible(False) |
|
|
| |
| ax.tick_params(axis="y", which="both", length=0) |
|
|
| ax.legend( |
| bbox_to_anchor=(1.02, 1), |
| loc="upper left", |
| frameon=False, |
| ncol=1, |
| handletextpad=0.5, |
| fontsize=9, |
| ) |
|
|
| |
| if parameter in [ |
| |
| "Fecal Coliform (MPN)", |
| "Total Nitrogen", |
| "Total Phosphorus", |
| ]: |
| ax.set_yscale("log") |
| ax.yaxis.set_major_formatter(plt.ScalarFormatter()) |
|
|
| means_df.insert(0, "parameter", parameter) |
| return fig, means_df |
|
|
|
|
| def plot_seasonal_line( |
| df: pd.DataFrame, |
| parameter: str, |
| period: str = "quarterly", |
| thresh: float | None = None, |
| sector: str | None = None, |
| ) -> tuple[Figure, pd.DataFrame, pd.DataFrame]: |
| """ |
| Create a line chart showing seasonal trends for a parameter across all years. |
| |
| Parameters: |
| ----------- |
| df : pd.DataFrame |
| Input dataframe containing measurements |
| parameter : str |
| Name of the parameter to plot |
| period : str |
| 'monthly' or 'quarterly' aggregation period |
| thresh : float | None |
| Optional threshold value to display on plot |
| sector : str | None |
| Optional sector name to include in title |
| Returns: |
| -------- |
| tuple[Figure, pd.DataFrame] |
| - Figure: Matplotlib figure containing the plot |
| - DataFrame: Filtered dataframe containing the data used in the plot |
| - DataFrame: Stats dataframe containing the mean, min, max, and overall average |
| """ |
| |
| param_data = df[df["Org_Analyte_Name"] == parameter].copy() |
|
|
| if param_data.empty: |
| raise ValueError(f"No data found for parameter: {parameter}") |
|
|
| |
| param_data["Month"] = param_data["Activity_Start_Date_Time"].dt.month |
| param_data["Quarter"] = param_data["Activity_Start_Date_Time"].dt.quarter |
|
|
| |
| if period.lower() == "monthly": |
| group_col = "Month" |
| x_ticks = range(1, 13) |
| x_label = "Month" |
| else: |
| group_col = "Quarter" |
| x_ticks = range(1, 5) |
| x_label = "Quarter" |
|
|
| |
| stats_df = ( |
| param_data.groupby(group_col, observed=True)["Org_Result_Value"] |
| .agg(["mean", "min", "max"]) |
| .reset_index() |
| ) |
|
|
| |
| stats_df["overall_avg"] = param_data["Org_Result_Value"].mean() |
|
|
| fig, ax = plt.subplots(figsize=(10, 6)) |
|
|
| |
| unit = param_data["Org_Result_Unit"].iloc[0] |
|
|
| |
| if parameter in [ |
| "Turbidity", |
| "Fecal Coliform (MPN)", |
| "Total Nitrogen", |
| "Total Phosphorus", |
| ]: |
| ax.set_yscale("log") |
| ax.yaxis.set_major_formatter( |
| plt.ScalarFormatter() |
| ) |
|
|
| |
| mean_line = ax.plot( |
| stats_df[group_col], |
| stats_df["mean"], |
| "b-", |
| linewidth=2, |
| marker="s", |
| label="Mean", |
| zorder=3, |
| )[0] |
| |
| ax.annotate( |
| "Mean", |
| xy=(stats_df[group_col].iloc[0], stats_df["mean"].iloc[0]), |
| xytext=(-5, 0), |
| textcoords="offset points", |
| ha="right", |
| va="center", |
| color=mean_line.get_color(), |
| fontsize=9, |
| ) |
|
|
| |
| min_line = ax.plot( |
| stats_df[group_col], |
| stats_df["min"], |
| "--", |
| color="gray", |
| linewidth=1, |
| label="Min", |
| zorder=2, |
| )[0] |
| |
| ax.annotate( |
| "Min", |
| xy=(stats_df[group_col].iloc[-1], stats_df["min"].iloc[-1]), |
| xytext=(5, 0), |
| textcoords="offset points", |
| va="center", |
| color=min_line.get_color(), |
| fontsize=9, |
| ) |
|
|
| |
| max_line = ax.plot( |
| stats_df[group_col], |
| stats_df["max"], |
| "--", |
| color="orange", |
| linewidth=1, |
| label="Max", |
| zorder=2, |
| )[0] |
| |
| ax.annotate( |
| "Max", |
| xy=(stats_df[group_col].iloc[-1], stats_df["max"].iloc[-1]), |
| xytext=(5, 0), |
| textcoords="offset points", |
| va="center", |
| color=max_line.get_color(), |
| fontsize=9, |
| ) |
|
|
| |
| avg_value = stats_df["overall_avg"].iloc[0] |
| ax.axhline( |
| y=avg_value, |
| color="blue", |
| linestyle=":", |
| alpha=0.5, |
| linewidth=1, |
| label="Average", |
| zorder=1, |
| ) |
| |
| ax.annotate( |
| "Average", |
| xy=(stats_df[group_col].iloc[-1], avg_value), |
| xytext=(27, -5), |
| textcoords="offset points", |
| va="top", |
| ha="right", |
| color="blue", |
| alpha=0.5, |
| fontsize=9, |
| ) |
|
|
| |
| legend = ax.get_legend() |
| if legend is not None: |
| legend.remove() |
|
|
| |
| if thresh is not None: |
| ax.axhline( |
| y=thresh, |
| color="red", |
| linestyle=":", |
| alpha=0.9, |
| linewidth=1.5, |
| label=f"Threshold: {thresh} {unit}", |
| zorder=1, |
| ) |
| |
| ax.legend( |
| [ |
| ax.axhline( |
| y=thresh, color="red", linestyle=":", alpha=0.9, linewidth=1.5 |
| ) |
| ], |
| [f"Threshold: {thresh} {unit}"], |
| loc="upper right", |
| frameon=False, |
| handletextpad=0.5, |
| fontsize=9, |
| ) |
|
|
| |
| ax.set_xticks(x_ticks) |
| if period.lower() == "quarterly": |
| |
| season_labels = ["Spring", "Summer", "Fall", "Winter"] |
| ax.set_xticklabels(season_labels) |
| |
| ax.tick_params(axis="x", which="both", length=0) |
| ax.set_xlabel(x_label) |
|
|
| |
| if unit == "deg C": |
|
|
| def celsius_to_fahrenheit(temp_c): |
| return (temp_c * 9 / 5) + 32 |
|
|
| |
| y1_min, y1_max = ax.get_ylim() |
|
|
| |
| ax2 = ax.secondary_yaxis( |
| "right", |
| functions=(celsius_to_fahrenheit, lambda f: (f - 32) * 5 / 9), |
| ) |
|
|
| |
| ax2.set_ylim(celsius_to_fahrenheit(y1_min), celsius_to_fahrenheit(y1_max)) |
|
|
| |
| primary_ticks = ax.get_yticks() |
| ax2.set_yticks([celsius_to_fahrenheit(t) for t in primary_ticks]) |
|
|
| |
| ax.yaxis.set_major_formatter(lambda x, p: f"{x:.0f}°C") |
| ax2.yaxis.set_major_formatter(lambda x, p: f"{x:.0f}°F") |
|
|
| |
| ax2.spines["right"].set_visible(False) |
| |
| ax2.tick_params(axis="y", which="both", length=0) |
| |
| elif unit == "ft": |
|
|
| def feet_to_meters(feet): |
| return feet * 0.3048 |
|
|
| ax2 = ax.secondary_yaxis( |
| "right", |
| functions=(feet_to_meters, lambda m: m / 0.3048), |
| ) |
| ax2.set_ylabel("Depth (m)") |
| ax.set_ylabel("Depth (ft)") |
| |
| ax2.spines["right"].set_visible(False) |
| |
| ax2.tick_params(axis="y", which="both", length=0) |
| else: |
| ax.set_ylabel(f"{unit}") |
|
|
| |
| start_year = param_data["Activity_Start_Date_Time"].dt.year.min() |
| end_year = param_data["Activity_Start_Date_Time"].dt.year.max() |
| year_range = ( |
| f" ({start_year}-{end_year})" if start_year != end_year else f" ({start_year})" |
| ) |
| title = f"Seasonal {parameter} Trends{year_range}" |
| if sector: |
| title = f"{title} - {sector}" |
| ax.set_title(title) |
|
|
| ax.grid(True, axis="y", alpha=0.15, linestyle="-", color="gray") |
|
|
| |
| ax.spines["top"].set_visible(False) |
| ax.spines["right"].set_visible(False) |
| ax.spines["left"].set_visible(False) |
|
|
| |
| ax.tick_params(axis="y", which="both", length=0) |
|
|
| |
| if unit == "deg C": |
| plt.tight_layout(rect=(0, 0, 0.95, 1)) |
| else: |
| plt.tight_layout(rect=(0, 0, 0.9, 1)) |
| stats_df.insert(0, "parameter", parameter) |
| return fig, param_data, stats_df |
|
|
|
|
| @timer(include_params=True) |
| def plot_sector_line_charts( |
| df: pd.DataFrame, |
| parameter: str, |
| show_sem: bool = True, |
| panel_chart: bool = False, |
| color_scale: list[str] = COLOR_SCALE, |
| ) -> tuple[Figure, pd.DataFrame, pd.DataFrame]: |
| """ |
| Create a plot of mean annual parameter trends by sector. |
| |
| Parameters: |
| ----------- |
| df : pd.DataFrame |
| Input dataframe |
| parameter : str |
| Name of the parameter to plot |
| show_sem : bool, default=True |
| Whether to show the standard error of the mean bands |
| panel_chart : bool, default=False |
| If True, creates a grid of individual sector charts instead of overlapping lines |
| |
| Returns: |
| -------- |
| tuple[Figure, pd.DataFrame, pd.DataFrame] |
| - Figure: Matplotlib figure containing the line chart(s) |
| - DataFrame: Filtered dataframe containing the data used in the plot |
| - DataFrame: Contains the plotted data points with means and standard errors |
| """ |
| GREY10 = "#1a1a1a" |
| GREY30 = "#4d4d4d" |
| GREY40 = "#666666" |
| GREY75 = "#bfbfbf" |
| GREY91 = "#e8e8e8" |
|
|
| |
| param_data = df[df["Org_Analyte_Name"] == parameter].copy() |
| if parameter == "Salinity": |
| param_data = param_data[param_data["Sector"] != "Freshwater Lakes"] |
|
|
| sectors = sorted(param_data["Sector"].unique()) |
| years = sorted(param_data["Reporting_Year"].unique()) |
| param_unit = param_data["Org_Result_Unit"].iloc[0] if not param_data.empty else "" |
|
|
| |
| sector_data_dict = {} |
| for sector in sectors: |
| sector_data = ( |
| param_data[param_data["Sector"] == sector] |
| .groupby("Reporting_Year", observed=True)["Org_Result_Value"] |
| .agg(["mean", "sem"]) |
| .reset_index() |
| ) |
| sector_data["Sector"] = sector |
| sector_data_dict[sector] = sector_data |
|
|
| |
| use_log_scale = parameter in [ |
| "Turbidity", |
| "Fecal Coliform (MPN)", |
| "Total Nitrogen", |
| "Total Phosphorus", |
| ] |
|
|
| y_min = float("inf") |
| y_max = float("-inf") |
| for data in sector_data_dict.values(): |
| if not data.empty: |
| y_min = min(y_min, (data["mean"] - data["sem"]).min()) |
| y_max = max(y_max, (data["mean"] + data["sem"]).max()) |
|
|
| |
| if use_log_scale: |
| y_min = y_min / 1.2 |
| y_max = y_max * 1.2 |
| else: |
| y_range = y_max - y_min |
| y_min = y_min - (y_range * 0.05) |
| y_max = y_max + (y_range * 0.05) |
|
|
| |
| if panel_chart: |
| n_cols = min(3, len(sectors)) |
| n_rows = (len(sectors) + n_cols - 1) // n_cols |
| fig = plt.figure(figsize=(5 * n_cols, 3 * n_rows)) |
| else: |
| fig, main_ax = plt.subplots(figsize=(14, 4)) |
|
|
| |
| def plot_sector_on_axis( |
| ax: plt.Axes, |
| sector_data: pd.DataFrame, |
| color: str, |
| show_label: bool = False, |
| ): |
| line = ax.plot( |
| sector_data["Reporting_Year"], |
| sector_data["mean"], |
| "-o", |
| color=color, |
| label=sector if show_label else None, |
| markersize=4, |
| linewidth=2, |
| ) |
|
|
| if show_sem: |
| ax.fill_between( |
| sector_data["Reporting_Year"], |
| sector_data["mean"] - sector_data["sem"], |
| sector_data["mean"] + sector_data["sem"], |
| color=color, |
| alpha=0.15, |
| ) |
|
|
| |
| ax.grid(True, axis="y", which="major", alpha=0.2, linestyle="--") |
| ax.grid(True, axis="y", which="minor", alpha=0.1, linestyle="--") |
| ax.spines["top"].set_visible(False) |
| ax.spines["right"].set_visible(False) |
| ax.spines["left"].set_visible(False) |
| ax.spines["bottom"].set_color(GREY40) |
| ax.tick_params(axis="both", which="both", length=0, colors=GREY40) |
| ax.set_xticks(years) |
|
|
| if use_log_scale: |
| ax.set_yscale("log") |
| ax.set_ylim(y_min, y_max) |
|
|
| def format_func(x, _): |
| |
| min_value = min(sector_data["mean"].min(), y_min) |
| needs_decimals = min_value < 1 or not all( |
| val.is_integer() for val in sector_data["mean"] |
| ) |
|
|
| if x == 0: |
| return "0" |
| elif needs_decimals: |
| return f"{x:.1f}" |
| else: |
| return f"{int(x)}" |
|
|
| ax.yaxis.set_major_formatter(plt.FuncFormatter(format_func)) |
|
|
| |
| range_ratio = y_max / y_min |
| abs_min = min(abs(sector_data["mean"].min()), abs(y_min)) |
| abs_max = max(abs(sector_data["mean"].max()), abs(y_max)) |
|
|
| if parameter == "Total Phosphorus": |
| |
| major_ticks = np.array([10, 13, 15, 17, 20, 30, 40, 50]) |
| major_ticks = major_ticks[ |
| (major_ticks >= y_min * 0.9) & (major_ticks <= y_max * 1.1) |
| ] |
| ax.yaxis.set_major_locator(plt.FixedLocator(major_ticks)) |
| ax.yaxis.set_minor_locator(plt.NullLocator()) |
| elif abs_min >= 100: |
| |
| major_ticks = np.array([100, 200, 300, 400, 500]) |
| major_ticks = major_ticks[ |
| (major_ticks >= y_min * 0.9) & (major_ticks <= y_max * 1.1) |
| ] |
| ax.yaxis.set_major_locator(plt.FixedLocator(major_ticks)) |
| ax.yaxis.set_minor_locator(plt.NullLocator()) |
| elif abs_min >= 10 and abs_max <= 100: |
| |
| major_ticks = np.array([10, 20, 30, 40, 50, 60, 70, 80, 90, 100]) |
| major_ticks = major_ticks[ |
| (major_ticks >= y_min * 0.9) & (major_ticks <= y_max * 1.1) |
| ] |
| ax.yaxis.set_major_locator(plt.FixedLocator(major_ticks)) |
| ax.yaxis.set_minor_locator(plt.NullLocator()) |
| elif range_ratio > 10: |
| |
| ax.yaxis.set_major_locator(plt.LogLocator(base=10.0, numticks=5)) |
| ax.yaxis.set_minor_locator( |
| plt.LogLocator(base=10.0, subs=(2, 5), numticks=5) |
| ) |
| ax.yaxis.set_minor_formatter(plt.FuncFormatter(format_func)) |
| else: |
| |
| if y_min < 1: |
| major_ticks = np.array([0.5, 1, 1.5, 2, 2.5, 3, 4, 5]) |
| else: |
| major_ticks = np.arange( |
| np.floor(y_min), |
| np.ceil(y_max) + 1, |
| 1 if y_max - y_min < 5 else 2, |
| ) |
| major_ticks = major_ticks[ |
| (major_ticks >= y_min * 0.9) & (major_ticks <= y_max * 1.1) |
| ] |
| ax.yaxis.set_major_locator(plt.FixedLocator(major_ticks)) |
| ax.yaxis.set_minor_locator(plt.NullLocator()) |
|
|
| |
| ax.tick_params(axis="y", which="both", labelsize=9) |
|
|
| else: |
| ax.set_ylim(y_min, y_max) |
|
|
| |
| min_value = min(sector_data["mean"].min(), y_min) |
| needs_decimals = min_value < 1 or not all( |
| val.is_integer() for val in sector_data["mean"] |
| ) |
|
|
| def linear_format_func(x, _): |
| if needs_decimals: |
| return f"{x:.1f}" |
| return f"{int(x)}" |
|
|
| ax.yaxis.set_major_formatter(plt.FuncFormatter(linear_format_func)) |
|
|
| return line |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| for i, (sector, color) in enumerate(zip(sectors, color_scale)): |
| sector_data = sector_data_dict[sector] |
| if sector_data.empty: |
| continue |
|
|
| if panel_chart: |
| ax = fig.add_subplot(n_rows, n_cols, i + 1) |
| plot_sector_on_axis(ax, sector_data, color) |
| ax.set_title(sector, pad=10, fontsize=10, color=GREY30) |
|
|
| |
| if len(years) > 8: |
| |
| n = len(years) // 8 + 1 |
| visible_ticks = years[::n] |
| ax.set_xticks(visible_ticks) |
| ax.set_xticklabels(visible_ticks, rotation=0, weight=500, color=GREY40) |
| |
| ax.tick_params(axis="x", which="major", length=4, colors=GREY40) |
| else: |
| ax.set_xticklabels(years, rotation=0, weight=500, color=GREY40) |
| |
| ax.tick_params(axis="x", which="major", length=0) |
| else: |
| plot_sector_on_axis(main_ax, sector_data, color, show_label=True) |
|
|
| |
| if panel_chart: |
| title = f"{parameter}{' (' + param_unit + ')' if param_unit else ''}" |
| fig.suptitle(title, fontsize=14, y=1.02, color=GREY30) |
| else: |
| main_ax.set_title( |
| parameter, pad=10, fontsize=14, fontweight="normal", color=GREY30 |
| ) |
| main_ax.set_ylabel(param_unit, fontsize=12, color=GREY40) |
| main_ax.set_xticklabels(years, weight=500, color=GREY40) |
| main_ax.yaxis.label.set_color(GREY40) |
| main_ax.legend( |
| bbox_to_anchor=(1.05, 1), |
| loc="upper left", |
| borderaxespad=0.0, |
| frameon=False, |
| fontsize=9, |
| ) |
|
|
| if use_log_scale: |
| main_ax.yaxis.set_major_formatter(plt.ScalarFormatter()) |
| main_ax.yaxis.get_major_formatter().set_scientific(False) |
|
|
| plt.tight_layout() |
|
|
| |
| plot_data = pd.concat(sector_data_dict.values(), ignore_index=True) |
| plot_data.insert(0, "parameter", parameter) |
|
|
| return fig, param_data, plot_data |
|
|
|
|
| @timer(include_params=True) |
| def plot_sector_box_charts( |
| df: pd.DataFrame, |
| parameter: str, |
| color_scale: list[str] = COLOR_SCALE, |
| show_trend: bool = True, |
| ) -> tuple[Figure, pd.DataFrame, pd.DataFrame]: |
| """ |
| Create box plots showing the distribution of parameter values by sector and year, |
| with optional trend lines and statistics. |
| |
| Parameters: |
| ----------- |
| df : pd.DataFrame |
| Input dataframe |
| parameter : str |
| Parameter to plot |
| color_scale : list[str] |
| List of colors to use for sectors |
| show_trend : bool, default=True |
| Whether to show trend lines and statistics |
| |
| Returns: |
| -------- |
| tuple[Figure, pd.DataFrame, pd.DataFrame] |
| - Figure: Matplotlib figure containing the box plots |
| - DataFrame: Filtered dataframe containing the raw data used in the plot |
| - DataFrame: Contains the plotted data points: mean, median, and quartiles |
| """ |
| from scipy import stats |
|
|
| |
| GREY30 = "#4d4d4d" |
| GREY40 = "#666666" |
|
|
| |
| param_data = df[df["Org_Analyte_Name"] == parameter].copy() |
|
|
| |
| if parameter == "Salinity": |
| param_data = param_data[param_data["Sector"] != "Freshwater Lakes"] |
|
|
| |
| param_data["Reporting_Year"] = param_data["Activity_Start_Date_Time"].dt.year |
| sectors = sorted(param_data["Sector"].unique()) |
| years = sorted(param_data["Reporting_Year"].unique()) |
|
|
| |
| use_log_scale = parameter in [ |
| "Turbidity", |
| "Fecal Coliform (MPN)", |
| "Total Nitrogen", |
| "Total Phosphorus", |
| ] |
|
|
| |
| fig = plt.figure(figsize=(15, 2.5 * len(sectors))) |
|
|
| |
| for idx, sector in enumerate(sectors): |
| ax = plt.subplot(len(sectors), 1, idx + 1) |
| sector_data = param_data[param_data["Sector"] == sector] |
|
|
| bp = ax.boxplot( |
| [ |
| sector_data[sector_data["Reporting_Year"] == year][ |
| "Org_Result_Value" |
| ].dropna() |
| for year in years |
| ], |
| labels=years, |
| patch_artist=True, |
| medianprops=dict(color="black"), |
| flierprops=dict( |
| marker="o", |
| markerfacecolor=color_scale[idx], |
| alpha=0.5, |
| markersize=4, |
| ), |
| boxprops=dict(facecolor=color_scale[idx], alpha=0.6), |
| widths=0.6, |
| positions=range(len(years)), |
| ) |
|
|
| |
| if show_trend: |
| |
| annual_means = [ |
| sector_data[sector_data["Reporting_Year"] == year][ |
| "Org_Result_Value" |
| ].mean() |
| for year in years |
| ] |
|
|
| |
| valid_points = [ |
| (x, y) for x, y in enumerate(annual_means) if not np.isnan(y) |
| ] |
| if valid_points: |
| x_valid, y_valid = zip(*valid_points) |
|
|
| |
| slope, intercept, r_value, p_value, std_err = stats.linregress( |
| x_valid, y_valid |
| ) |
|
|
| |
| line_x = np.array(x_valid) |
| line_y = slope * line_x + intercept |
| ax.plot(line_x, line_y, "--", color="red", alpha=0.7, linewidth=1.5) |
|
|
| |
| stats_text = f"R² = {r_value**2:.3f}\np = {p_value:.3f}" |
| ax.text( |
| 0.02, |
| 0.98, |
| stats_text, |
| transform=ax.transAxes, |
| verticalalignment="top", |
| fontsize=8, |
| bbox=dict(facecolor="white", alpha=0.8, edgecolor="none"), |
| ) |
|
|
| |
| ax.set_xlim(-0.5, len(years) - 0.5) |
|
|
| ax.set_title(sector, pad=10, fontsize=10, color=GREY30) |
|
|
| if use_log_scale: |
| ax.set_yscale("log") |
|
|
| |
| ax.grid(True, axis="y", alpha=0.15, linestyle="-", color="gray") |
| ax.spines["top"].set_visible(False) |
| ax.spines["right"].set_visible(False) |
| ax.spines["left"].set_visible(False) |
| ax.spines["bottom"].set_color(GREY40) |
| ax.spines["bottom"].set_linewidth(0.5) |
|
|
| |
| ax.tick_params(axis="both", which="both", length=0, colors=GREY40) |
|
|
| ax.set_xticks(range(len(years))) |
| ax.set_xticklabels(years, ha="center", weight=500, color=GREY40) |
|
|
| |
| fig.suptitle( |
| f"{parameter} Distribution by Sector", fontsize=14, y=1.02, color=GREY30 |
| ) |
|
|
| |
| plt.tight_layout() |
| plt.subplots_adjust(hspace=0.4) |
|
|
| |
| stats_data = [] |
| for sector in sectors: |
| sector_data = param_data[param_data["Sector"] == sector] |
| for year in years: |
| year_data = sector_data[sector_data["Reporting_Year"] == year][ |
| "Org_Result_Value" |
| ] |
| if not year_data.empty: |
| stats = { |
| "Sector": sector, |
| "Reporting_Year": year, |
| "mean": year_data.mean(), |
| "median": year_data.median(), |
| "q1": year_data.quantile(0.25), |
| "q3": year_data.quantile(0.75), |
| "min": year_data.min(), |
| "max": year_data.max(), |
| "count": len(year_data), |
| } |
| stats_data.append(stats) |
|
|
| |
| stats_df = pd.DataFrame(stats_data) |
| stats_df.insert(0, "parameter", parameter) |
|
|
| return fig, param_data, stats_df |
|
|
|
|
| @timer(include_params=True) |
| def plot_sector_heatmap( |
| df: pd.DataFrame, |
| parameter: str, |
| show_values: bool = False, |
| ) -> tuple[Figure, pd.DataFrame, pd.DataFrame]: |
| """ |
| Create a heatmap showing annual means by sector and year. |
| |
| Parameters: |
| ----------- |
| df : pd.DataFrame |
| Input dataframe |
| parameter : str |
| Name of the parameter to plot |
| show_values : bool, default=False |
| Whether to display mean values inside each cell |
| |
| Returns: |
| -------- |
| tuple[Figure, pd.DataFrame, pd.DataFrame] |
| - Figure: Matplotlib figure containing the heatmap |
| - DataFrame: Filtered dataframe containing the raw data used in the plot |
| - DataFrame: Contains the plotted data points: mean values for each sector and year |
| """ |
| |
| param_data = df[df["Org_Analyte_Name"] == parameter].copy() |
|
|
| |
| if parameter == "Salinity": |
| param_data = param_data[param_data["Sector"] != "Fresh Water Lakes"] |
|
|
| |
| plot_data = ( |
| param_data.groupby(["Reporting_Year", "Sector"], observed=True)[ |
| "Org_Result_Value" |
| ] |
| .mean() |
| .reset_index() |
| .pivot(index="Sector", columns="Reporting_Year", values="Org_Result_Value") |
| ) |
|
|
| |
| fig, ax = plt.subplots(figsize=(12, len(plot_data) * 0.8)) |
|
|
| |
| im = ax.imshow(plot_data, aspect="auto", cmap="YlOrRd") |
|
|
| |
| ax.set_xticks(np.arange(len(plot_data.columns))) |
| ax.set_yticks(np.arange(len(plot_data.index))) |
| ax.set_xticklabels(plot_data.columns) |
| ax.set_yticklabels(plot_data.index) |
|
|
| |
| ax.spines["top"].set_visible(False) |
| ax.spines["right"].set_visible(False) |
| ax.spines["left"].set_visible(False) |
| ax.spines["bottom"].set_visible(False) |
|
|
| |
| ax.tick_params(axis="both", which="both", length=0) |
|
|
| |
| ax.set_xticks(np.arange(plot_data.shape[1] + 1) - 0.5, minor=True) |
| ax.set_yticks(np.arange(plot_data.shape[0] + 1) - 0.5, minor=True) |
| ax.grid(which="minor", color="w", linestyle="-", linewidth=2) |
|
|
| |
| plt.setp(ax.get_xticklabels(), rotation=0) |
|
|
| |
| if show_values: |
| for i in range(len(plot_data.index)): |
| for j in range(len(plot_data.columns)): |
| value = plot_data.iloc[i, j] |
| if not pd.isna(value): |
| text = f"{value:.1f}" |
| ax.text(j, i, text, ha="center", va="center", color="black") |
|
|
| |
| cbar = ax.figure.colorbar(im, ax=ax, orientation="horizontal", pad=0.1) |
| unit = param_data["Org_Result_Unit"].iloc[0] if not param_data.empty else "" |
| cbar.ax.set_xlabel(f"Mean ({unit})") |
| cbar.outline.set_visible(False) |
|
|
| |
| ax.set_title(parameter) |
|
|
| plt.tight_layout() |
|
|
| |
| plot_data = plot_data.reset_index() |
| plot_data.insert(0, "parameter", parameter) |
|
|
| return fig, param_data, plot_data |
|
|