| from typing import Literal |
|
|
| import altair as alt |
| import matplotlib.pyplot as plt |
| import numpy as np |
| import pandas as pd |
| import plotly.graph_objects as go |
| import seaborn as sns |
| import streamlit as st |
| from matplotlib.figure import Figure |
|
|
| from utils.data_loading import timer |
|
|
|
|
| @st.cache_data |
| @timer(include_params=True) |
| def plot_parameter_correlations( |
| df: pd.DataFrame, |
| analyte_names: list[str], |
| subset_by: str, |
| subset: str, |
| filter_by: str, |
| threshold: float = 0.2, |
| corr_method: Literal["pearson", "kendall", "spearman"] = "kendall", |
| ) -> tuple[Figure, pd.DataFrame]: |
| """ |
| Creates a correlation heatmap showing relationships between water quality parameters, |
| with additional information about data completeness. |
| |
| Parameters |
| ---------- |
| df : pd.DataFrame |
| Input DataFrame containing water quality measurements. Must have columns: |
| - Org_Analyte_Name: Name of the analyte |
| - Org_Result_Value: Measurement value |
| - Activity_Start_Date_Time: Timestamp of measurement |
| - Reporting_Year: Year of measurement |
| - Station_Number: Monitoring station identifier |
| - Name: Station name |
| - Sample_Position: Sample depth position (e.g., "Surface", "Bottom") |
| |
| analyte_names : list[str] |
| List of analyte names to include in correlation analysis |
| |
| subset_by : str |
| Column name used for subsetting the data (e.g., "Sector", "Waterbody_Class") |
| |
| subset : str |
| Value within subset_by column to filter data (e.g., specific sector name) |
| |
| filter_by : str |
| Sample position filter ("Surface", "Bottom", or "All") |
| |
| threshold : float, default=0.2 |
| Minimum data completeness threshold (0-1). Parameters with completeness below |
| this threshold will be excluded from correlation analysis but listed in footnote. |
| |
| corr_method : {"pearson", "kendall", "spearman"}, default="kendall" |
| Method of correlation to use. Options are: |
| - "pearson": Standard correlation coefficient |
| - "kendall": Kendall Tau correlation coefficient |
| - "spearman": Spearman rank correlation coefficient |
| |
| Returns |
| ------- |
| tuple[Figure, pd.DataFrame] |
| - Figure: Matplotlib figure containing: |
| - Correlation heatmap with values |
| - Title showing subset and sample size |
| - Footnote listing excluded parameters |
| - DataFrame: Pivot table of filtered data used for correlation analysis |
| |
| Notes |
| ----- |
| - Uses abbreviated parameter names for cleaner display (e.g., "DO" for "Dissolved Oxygen") |
| - Masks upper triangle of correlation matrix |
| - Colors correlations using RdBu_r colormap centered at 0 |
| - Includes data completeness information in footnote |
| - Caches results using streamlit cache decorator |
| """ |
| |
| GREY30 = "#4d4d4d" |
| GREY40 = "#666666" |
|
|
| measured_params = ( |
| df[df["Org_Analyte_Name"].isin(analyte_names)] |
| .groupby("Org_Analyte_Name", observed=True) |
| .size() |
| ) |
|
|
| |
| pivot_df = df[ |
| df["Org_Analyte_Name"].isin(set(measured_params.index) & set(analyte_names)) |
| ].pivot_table( |
| index="Activity_Start_Date_Time", |
| columns="Org_Analyte_Name", |
| values="Org_Result_Value", |
| observed=False, |
| ) |
| name_mapping = { |
| "Depth, Secchi Disk Depth": "Secchi Depth", |
| "Dissolved Oxygen": "DO", |
| "Fecal Coliform (MPN)": "Fecal Coliform", |
| "Total Nitrogen": "TN", |
| "Total Phosphorus": "TP", |
| } |
|
|
| |
| completeness = {} |
| for param in measured_params.index: |
| if param in analyte_names and param in pivot_df.columns: |
| total_measurements = measured_params[param] |
| |
| valid_values = pivot_df[param].notna().sum() |
| |
| new_name = name_mapping.get(param, param) |
| completeness[new_name] = valid_values / total_measurements |
|
|
| completeness = pd.Series(completeness) |
| pivot_df = pivot_df.rename(columns=name_mapping) |
|
|
| |
| completeness = pivot_df.notna().mean() |
| valid_params = completeness[completeness >= threshold].index |
| excluded_params = completeness[completeness < threshold] |
|
|
| |
| pivot_df = pivot_df[valid_params] |
|
|
| |
| corr = pivot_df.corr(method=corr_method) |
|
|
| |
| n_samples = len(df) |
|
|
| fig = plt.figure(figsize=(3, 3.5)) |
|
|
| |
| gs = fig.add_gridspec( |
| 3, |
| 1, |
| height_ratios=[ |
| 1, |
| 4, |
| 1.5, |
| ], |
| hspace=0.4, |
| ) |
|
|
| |
| title_ax = fig.add_subplot(gs[0]) |
| heatmap_ax = fig.add_subplot(gs[1]) |
| footnote_ax = fig.add_subplot(gs[2]) |
|
|
| |
| mask = np.triu(np.ones_like(corr, dtype=bool)) |
| heatmap = sns.heatmap( |
| corr, |
| mask=mask, |
| annot=True, |
| cmap="RdBu_r", |
| center=0, |
| vmin=-1, |
| vmax=1, |
| ax=heatmap_ax, |
| yticklabels=1, |
| cbar=True, |
| xticklabels=1, |
| annot_kws={"size": 5}, |
| fmt=".2f", |
| ) |
|
|
| |
| for spine in heatmap_ax.spines.values(): |
| spine.set_visible(False) |
|
|
| |
| heatmap_ax.set_xticklabels( |
| heatmap_ax.get_xticklabels(), |
| rotation=45, |
| ha="right", |
| rotation_mode="anchor", |
| color=GREY40, |
| ) |
|
|
| |
| heatmap_ax.tick_params( |
| axis="x", |
| pad=5, |
| labelsize=5, |
| length=0, |
| colors=GREY40, |
| ) |
| heatmap_ax.tick_params( |
| axis="y", |
| pad=5, |
| labelsize=5, |
| length=0, |
| colors=GREY40, |
| ) |
|
|
| |
| colorbar = heatmap.figure.axes[-1] |
| ticks = colorbar.get_yticks() |
| colorbar.set_yticks(ticks) |
| tick_labels = [f"{x:>8.2f}" for x in ticks] |
| colorbar.set_yticklabels( |
| tick_labels, |
| size=5, |
| color=GREY40, |
| ) |
|
|
| |
| heatmap_ax.set_yticklabels( |
| heatmap_ax.get_yticklabels(), |
| rotation=0, |
| color=GREY40, |
| ) |
|
|
| |
| heatmap_ax.set_xlabel("") |
| heatmap_ax.set_ylabel("") |
|
|
| |
| footnote_ax.set_frame_on(False) |
| footnote_ax.set_xticks([]) |
| footnote_ax.set_yticks([]) |
|
|
| |
| if not excluded_params.empty: |
| footnote_text = "Excluded parameters (<{:.0%} data completeness):\n".format( |
| threshold |
| ) |
| for param, completeness_val in excluded_params.items(): |
| footnote_text += f" - {param}: {completeness_val:.1%} complete\n" |
|
|
| footnote_ax.text( |
| 0.01, |
| 0.40, |
| footnote_text.rstrip(), |
| ha="left", |
| va="center", |
| fontsize=5, |
| fontstyle="italic", |
| transform=footnote_ax.transAxes, |
| color=GREY40, |
| bbox=dict( |
| facecolor="white", alpha=0.8, edgecolor="none" |
| ), |
| ) |
|
|
| title_ax.set_frame_on(False) |
| title_ax.set_xticks([]) |
| title_ax.set_yticks([]) |
|
|
| display_filter = "Surface and Bottom" if filter_by == "All" else filter_by |
|
|
| |
| year_info = ( |
| f"Reporting Year {df['Reporting_Year'].iloc[0]}" |
| if len(df["Reporting_Year"].unique()) == 1 |
| else "All Years" |
| ) |
|
|
| |
| title_ax.text( |
| 0.45, |
| 0.8, |
| f"{subset_by}: {subset}", |
| ha="center", |
| va="center", |
| fontsize=8, |
| fontweight="bold", |
| transform=fig.transFigure, |
| color=GREY30, |
| ) |
| title_ax.text( |
| 0.45, |
| 0.75, |
| f"{display_filter}, {year_info} (n={n_samples:,})", |
| ha="center", |
| va="bottom", |
| fontsize=6, |
| fontstyle="italic", |
| transform=fig.transFigure, |
| color=GREY40, |
| ) |
|
|
| |
| |
| fig.canvas.draw() |
|
|
| |
| renderer = fig.canvas.get_renderer() |
| fig.get_tightbbox(renderer) |
|
|
| |
| fig.subplots_adjust(left=0.15, right=0.95, bottom=0.02, top=0.85, hspace=0.4) |
|
|
| return fig, pivot_df |
|
|
|
|
| def plot_np_ratios(df: pd.DataFrame) -> Figure: |
| """ |
| Create a visualization of N:P ratios over time and their distribution. |
| |
| Parameters: |
| ----------- |
| df : pd.DataFrame |
| Input dataframe containing nutrient measurements |
| |
| Returns: |
| -------- |
| Figure |
| Matplotlib figure containing time series and distribution plots |
| """ |
| |
| GREY30 = "#4d4d4d" |
| GREY40 = "#666666" |
|
|
| |
| nutrients_df = ( |
| df[df["Org_Analyte_Name"].isin(["Total Nitrogen", "Total Phosphorus"])] |
| .pivot_table( |
| index=["Activity_Start_Date_Time", "Sector"], |
| columns="Org_Analyte_Name", |
| values="Org_Result_Value", |
| observed=True, |
| ) |
| .reset_index() |
| ) |
|
|
| |
| nutrients_df["N:P Ratio"] = ( |
| nutrients_df["Total Nitrogen"] / nutrients_df["Total Phosphorus"] |
| ) |
|
|
| |
| fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 8)) |
|
|
| |
| scatter = sns.scatterplot( |
| data=nutrients_df, |
| x="Activity_Start_Date_Time", |
| y="N:P Ratio", |
| hue="Sector", |
| ax=ax1, |
| alpha=0.6, |
| s=20, |
| ) |
|
|
| |
| ax1.axhline( |
| y=16, |
| color="red", |
| linestyle="--", |
| alpha=0.7, |
| linewidth=1.5, |
| label="Redfield Ratio (16:1)", |
| ) |
|
|
| |
| for ax in [ax1, ax2]: |
| ax.spines["top"].set_visible(False) |
| ax.spines["right"].set_visible(False) |
| ax.spines["bottom"].set_color(GREY40) |
| ax.spines["bottom"].set_linewidth(0.5) |
| ax.spines["left"].set_visible(False) |
|
|
| |
| ax.tick_params(axis="both", which="both", length=0, colors=GREY40) |
|
|
| |
| ax.grid(True, axis="y", alpha=0.15, linestyle="-", color="gray") |
|
|
| |
| ax1.set_ylabel("N:P Ratio", color=GREY40, fontsize=10) |
| ax1.set_xlabel("Date", color=GREY40, fontsize=10) |
| ax1.set_title("N:P Ratio Over Time", color=GREY30, fontsize=10, pad=10) |
|
|
| |
| legend = ax1.legend( |
| bbox_to_anchor=(1.05, 1), |
| loc="upper left", |
| frameon=False, |
| title="Sector", |
| ) |
| legend.get_title().set_color(GREY40) |
| for text in legend.get_texts(): |
| text.set_color(GREY40) |
|
|
| |
| sns.histplot( |
| x=nutrients_df["N:P Ratio"].dropna(), |
| ax=ax2, |
| alpha=0.6, |
| ) |
|
|
| |
| ax2.axvline( |
| x=16, |
| color="red", |
| linestyle="--", |
| alpha=0.7, |
| linewidth=1.5, |
| label="Redfield Ratio (16:1)", |
| ) |
|
|
| |
| ax2.set_xlabel("N:P Ratio", color=GREY40, fontsize=10) |
| ax2.set_ylabel("Count", color=GREY40, fontsize=10) |
| ax2.set_title("Distribution of N:P Ratios", color=GREY30, fontsize=10, pad=10) |
| ax2.legend(frameon=False) |
|
|
| |
| n_samples = len(nutrients_df.dropna(subset=["N:P Ratio"])) |
| stats_text = f"n = {n_samples:,}" |
| ax2.text( |
| 0.02, |
| 0.98, |
| stats_text, |
| transform=ax2.transAxes, |
| verticalalignment="top", |
| fontsize=8, |
| bbox=dict(facecolor="white", alpha=0.8, edgecolor="none"), |
| color=GREY40, |
| ) |
|
|
| |
| plt.tight_layout() |
| |
| plt.subplots_adjust(right=0.85) |
|
|
| return fig |
|
|
|
|
| def altair_plot_np_ratios(df: pd.DataFrame) -> alt.VConcatChart: |
| |
| nutrients_df = ( |
| df[df["Org_Analyte_Name"].isin(["Total Nitrogen", "Total Phosphorus"])] |
| .pivot_table( |
| index=["Activity_Start_Date_Time", "Sector"], |
| columns="Org_Analyte_Name", |
| values="Org_Result_Value", |
| observed=True, |
| ) |
| .reset_index() |
| ) |
|
|
| |
| nutrients_df["N:P Ratio"] = ( |
| nutrients_df["Total Nitrogen"] / nutrients_df["Total Phosphorus"] |
| ) |
|
|
| |
| time_series = ( |
| alt.Chart(nutrients_df) |
| .mark_circle(size=60) |
| .encode( |
| x=alt.X( |
| "Activity_Start_Date_Time:T", |
| axis=alt.Axis(format="%Y", tickCount="year"), |
| title="Date", |
| ), |
| y=alt.Y(r"N\:P Ratio:Q", title="N:P Ratio"), |
| color="Sector:N", |
| tooltip=[ |
| alt.Tooltip("Activity_Start_Date_Time:T", title="Date"), |
| alt.Tooltip(r"N\:P Ratio:Q", format=".0f", title="N:P Ratio"), |
| alt.Tooltip("Sector:N", title="Sector"), |
| ], |
| ) |
| .properties(title="N:P Ratio Over Time", width=600, height=300) |
| .interactive() |
| ) |
|
|
| |
| redfield_line = ( |
| alt.Chart(pd.DataFrame({"y": [16]})).mark_rule(color="red").encode(y="y:Q") |
| ) |
|
|
| |
| histogram = ( |
| alt.Chart(nutrients_df) |
| .mark_bar() |
| .encode( |
| x=alt.X(r"N\:P Ratio:Q", bin=alt.Bin(maxbins=30), title="N:P Ratio"), |
| y="count()", |
| tooltip=["count()"], |
| ) |
| .properties(title="Distribution of N:P Ratios", width=600, height=300) |
| .interactive() |
| ) |
|
|
| |
| redfield_hist_line = ( |
| alt.Chart(pd.DataFrame({"x": [16]})).mark_rule(color="red").encode(x="x:Q") |
| ) |
|
|
| |
| combined_chart = alt.vconcat( |
| time_series + redfield_line, histogram + redfield_hist_line |
| ).resolve_scale(y="independent") |
|
|
| return combined_chart |
|
|
|
|
| def plot_do_temp_relationship(df: pd.DataFrame) -> Figure: |
| """ |
| Create a scatter plot of DO vs temperature with regression line using seaborn. |
| |
| Parameters: |
| ----------- |
| df : pd.DataFrame |
| Input dataframe containing DO and temperature measurements |
| |
| Returns: |
| -------- |
| Figure |
| Matplotlib figure containing the plot |
| """ |
| do_temp_data = ( |
| df[df["Org_Analyte_Name"].isin(["Dissolved Oxygen", "Temperature, Water"])] |
| .pivot_table( |
| index=["Activity_Start_Date_Time", "Station_Number", "Sample_Position"], |
| columns="Org_Analyte_Name", |
| values="Org_Result_Value", |
| observed=True, |
| ) |
| .reset_index() |
| .dropna(subset=["Dissolved Oxygen", "Temperature, Water"]) |
| ) |
|
|
| |
| custom_palette = {"Surface": "#5AA4D8", "Bottom": "#1B4B8A"} |
|
|
| |
| g = sns.lmplot( |
| data=do_temp_data, |
| x="Temperature, Water", |
| y="Dissolved Oxygen", |
| hue="Sample_Position", |
| hue_order=["Bottom", "Surface"], |
| palette=custom_palette, |
| scatter_kws={"alpha": 0.5, "zorder": 2, "s": 20}, |
| line_kws={"zorder": 3, "linewidth": 1}, |
| height=8, |
| aspect=1.5, |
| legend=False, |
| ) |
|
|
| |
| ax = g.axes[0, 0] |
| ax.axhline( |
| y=4.8, color="#FF8C00", linestyle="--", alpha=0.9, zorder=1, linewidth=1 |
| ) |
| ax.text( |
| ax.get_xlim()[0], |
| 4.9, |
| " 4.8 mg/L DO threshold", |
| ha="left", |
| va="bottom", |
| color="#FF8C00", |
| alpha=0.9, |
| ) |
|
|
| |
| ax.spines["top"].set_visible(False) |
| ax.spines["right"].set_visible(False) |
| ax.spines["left"].set_visible(False) |
| ax.spines["bottom"].set_color("black") |
| ax.spines["bottom"].set_linewidth(0.5) |
|
|
| g.set_axis_labels("Water Temperature (°C)", "Dissolved Oxygen (mg/L)") |
| ax.set_title("Dissolved Oxygen vs Water Temperature", pad=20, fontsize=16) |
|
|
| |
| handles, labels = ax.get_legend_handles_labels() |
| |
| handles = handles[::-1] |
| labels = labels[::-1] |
| ax.legend( |
| handles, |
| labels, |
| bbox_to_anchor=(1.0, 1.0), |
| loc="upper right", |
| frameon=False, |
| handletextpad=0.5, |
| ) |
|
|
| |
| ax.grid(True, axis="y", alpha=0.15, linestyle="-", color="gray") |
|
|
| |
| ax.tick_params(axis="y", which="both", length=0) |
|
|
| |
| ymin = max(int(min(do_temp_data["Dissolved Oxygen"].min(), 4.8) * 0.9) - 1, 0) |
| ymax = do_temp_data["Dissolved Oxygen"].max() * 1.1 |
| ax.set_ylim(ymin, ymax) |
| yticks = np.arange(ymin, ymax, 2) |
| ax.set_yticks(yticks) |
|
|
| return g.figure |
|
|
|
|
| def plotly_plot_do_temp_relationship(df: pd.DataFrame) -> go.Figure: |
| """ |
| Create an interactive scatter plot of DO vs temperature with regression lines using Plotly. |
| Matches the style and features of the original matplotlib/seaborn plot. |
| |
| Parameters: |
| ----------- |
| df : pd.DataFrame |
| Input dataframe containing DO and temperature measurements |
| |
| Returns: |
| -------- |
| go.Figure |
| Plotly figure object |
| """ |
| |
| do_temp_data = ( |
| df[df["Org_Analyte_Name"].isin(["Dissolved Oxygen", "Temperature, Water"])] |
| .pivot_table( |
| index=[ |
| "Activity_Start_Date_Time", |
| "Station_Number", |
| "Sample_Position", |
| "Sector", |
| ], |
| columns="Org_Analyte_Name", |
| values="Org_Result_Value", |
| observed=True, |
| ) |
| .reset_index() |
| .dropna(subset=["Dissolved Oxygen", "Temperature, Water"]) |
| ) |
|
|
| |
| fig = go.Figure() |
|
|
| |
| colors = {"Surface": "#8da0cb", "Bottom": "#fc8d62"} |
|
|
| |
| for position in ["Surface", "Bottom"]: |
| pos_data = do_temp_data[do_temp_data["Sample_Position"] == position] |
|
|
| |
| fig.add_trace( |
| go.Scatter( |
| x=pos_data["Temperature, Water"], |
| y=pos_data["Dissolved Oxygen"], |
| mode="markers", |
| name=position, |
| marker=dict(color=colors[position], size=8, opacity=0.6), |
| hovertemplate=( |
| "Temperature: %{x:.1f}°C<br>" |
| "DO: %{y:.1f} mg/L<br>" |
| "Position: " + position + "<br>" |
| "Station: %{customdata[0]}<br>" |
| "Sector: %{customdata[1]}<br>" |
| "<extra></extra>" |
| ), |
| customdata=pos_data[["Station_Number", "Sector"]], |
| ) |
| ) |
|
|
| |
| z = np.polyfit(pos_data["Temperature, Water"], pos_data["Dissolved Oxygen"], 1) |
| p = np.poly1d(z) |
| x_range = np.linspace( |
| pos_data["Temperature, Water"].min(), |
| pos_data["Temperature, Water"].max(), |
| 100, |
| ) |
|
|
| fig.add_trace( |
| go.Scatter( |
| x=x_range, |
| y=p(x_range), |
| mode="lines", |
| line=dict(color=colors[position], dash="dash"), |
| name=f"{position} Trend", |
| hovertemplate=None, |
| hoverinfo="skip", |
| showlegend=False, |
| ) |
| ) |
|
|
| |
| fig.add_hline( |
| y=4.8, |
| line=dict(color="#FF8C00", width=1, dash="dash"), |
| opacity=0.5, |
| annotation_text="4.8 mg/L DO threshold", |
| annotation_position="left", |
| annotation=dict( |
| font=dict(color="#FF8C00", size=12), |
| xanchor="left", |
| yanchor="bottom", |
| opacity=0.8, |
| ), |
| ) |
|
|
| |
| fig.update_layout( |
| title=dict( |
| text="Dissolved Oxygen vs Water Temperature", |
| x=0.5, |
| y=0.95, |
| xanchor="center", |
| yanchor="top", |
| font=dict(size=16), |
| ), |
| xaxis_title="Water Temperature (°C)", |
| yaxis_title="Dissolved Oxygen (mg/L)", |
| legend_title="Sample Position", |
| legend=dict( |
| yanchor="top", |
| y=1, |
| xanchor="left", |
| x=1.05, |
| ), |
| template="plotly_white", |
| width=800, |
| height=600, |
| showlegend=True, |
| ) |
|
|
| |
| fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor="rgba(128, 128, 128, 0.2)") |
| fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor="rgba(128, 128, 128, 0.2)") |
|
|
| return fig |
|
|
|
|
| def altair_plot_do_temp_relationship(df: pd.DataFrame) -> alt.LayerChart: |
| """ |
| Create an interactive scatter plot of DO vs temperature with regression lines using Altair. |
| Matches the style and features of the original matplotlib/seaborn plot. |
| |
| Parameters: |
| ----------- |
| df : pd.DataFrame |
| Input dataframe containing DO and temperature measurements |
| |
| Returns: |
| -------- |
| alt.Chart |
| Altair chart object |
| """ |
| |
| do_temp_data = ( |
| df[df["Org_Analyte_Name"].isin(["Dissolved Oxygen", "Temperature, Water"])] |
| .pivot_table( |
| index=[ |
| "Activity_Start_Date_Time", |
| "Station_Number", |
| "Sample_Position", |
| "Sector", |
| ], |
| columns="Org_Analyte_Name", |
| values="Org_Result_Value", |
| observed=True, |
| ) |
| .reset_index() |
| .dropna(subset=["Dissolved Oxygen", "Temperature, Water"]) |
| ) |
|
|
| |
| scatter = ( |
| alt.Chart(do_temp_data) |
| .mark_circle(size=60, opacity=0.6) |
| .encode( |
| x=alt.X( |
| "Temperature, Water:Q", |
| title="Water Temperature (°C)", |
| scale=alt.Scale(zero=False), |
| ), |
| y=alt.Y( |
| "Dissolved Oxygen:Q", |
| title="Dissolved Oxygen (mg/L)", |
| scale=alt.Scale(zero=False), |
| ), |
| color=alt.Color( |
| "Sample_Position:N", |
| scale=alt.Scale( |
| domain=["Surface", "Bottom"], |
| range=["#8da0cb", "#fc8d62"], |
| ), |
| legend=alt.Legend(title="Sample Position"), |
| ), |
| tooltip=[ |
| alt.Tooltip("Temperature, Water:Q", title="Temperature", format=".1f"), |
| alt.Tooltip("Dissolved Oxygen:Q", title="DO", format=".1f"), |
| alt.Tooltip("Sample_Position:N", title="Position"), |
| alt.Tooltip("Sector:N", title="Sector"), |
| alt.Tooltip("Station_Number:N", title="Station"), |
| ], |
| ) |
| ) |
|
|
| |
| regression = ( |
| scatter.transform_regression( |
| "Temperature, Water", "Dissolved Oxygen", groupby=["Sample_Position"] |
| ) |
| .mark_line(size=2) |
| .encode( |
| color=alt.Color( |
| "Sample_Position:N", |
| scale=alt.Scale( |
| domain=["Surface", "Bottom"], range=["#8da0cb", "#fc8d62"] |
| ), |
| ) |
| ) |
| ) |
|
|
| |
| threshold_df = pd.DataFrame({"y": [5]}) |
| threshold_line = ( |
| alt.Chart(threshold_df) |
| .mark_rule(strokeDash=[4, 4], color="red", opacity=0.5) |
| .encode(y="y:Q") |
| ) |
|
|
| |
| threshold_label = ( |
| alt.Chart( |
| pd.DataFrame({"x": [do_temp_data["Temperature, Water"].min()], "y": [5.1]}) |
| ) |
| .mark_text( |
| align="left", |
| baseline="bottom", |
| color="red", |
| opacity=0.5, |
| text=" 5 mg/L DO threshold", |
| ) |
| .encode(x="x:Q", y="y:Q") |
| ) |
|
|
| |
| final_chart = ( |
| alt.layer(scatter, regression, threshold_line, threshold_label) |
| .properties( |
| width=800, |
| height=750, |
| ) |
| .configure_axis(grid=True, gridOpacity=0.3) |
| .interactive() |
| ) |
|
|
| return final_chart |
|
|