waterdb

Sleeping

App Files Files Community

github-actions[bot] commited on Nov 14, 2024

Commit

5d4e96b

1 Parent(s): 948797f

Fresh start without shapefiles

Browse files

Files changed (14) hide show

.github/workflows/deploy-to-huggingface.yml +16 -3
.gitignore +4 -0
.python-version +1 -0
README.md +10 -0
analysis.py +1234 -0
app.py +1070 -0
data/SAB/SAB.cpg +3 -0
data/SAB/SAB.prj +3 -0
data/SAB/SAB.qmd +27 -0
main.py +232 -0
pyproject.toml +32 -0
requirements.txt +14 -0
tests/test_main.py +116 -0
uv.lock +0 -0

.github/workflows/deploy-to-huggingface.yml CHANGED Viewed

@@ -25,19 +25,32 @@ jobs:
           # Clone the HF space
           git clone https://philmaxwell:$HF_TOKEN@huggingface.co/spaces/philmaxwell/sabw-wq-data hf_space
-          # Copy files, excluding large files and unnecessary directories
           rsync -av \
             --exclude 'hf_space' \
             --exclude '.git' \
             --exclude '*.ipynb' \
             --exclude 'Ideas.md' \
             --exclude 'data/*.parquet' \
             --exclude '*.csv' \
             --exclude '*.xlsx' \
             ./ hf_space/
           # Commit and push changes
           cd hf_space
           git add .
-          git commit -m "Update application files"
-          git push origin main

           # Clone the HF space
           git clone https://philmaxwell:$HF_TOKEN@huggingface.co/spaces/philmaxwell/sabw-wq-data hf_space
+          # Clean up everything except .git directory
+          cd hf_space
+          find . -mindepth 1 -not -path './.git*' -delete
+          # Remove LFS configuration
+          git rm .gitattributes || true
+          git commit -am "Clean repository"
+          # Copy new files, excluding shapefiles
+          cd ..
           rsync -av \
             --exclude 'hf_space' \
             --exclude '.git' \
+            --exclude '.gitattributes' \
             --exclude '*.ipynb' \
             --exclude 'Ideas.md' \
             --exclude 'data/*.parquet' \
             --exclude '*.csv' \
             --exclude '*.xlsx' \
+            --exclude 'data/SAB/*.shp' \
+            --exclude 'data/SAB/*.dbf' \
+            --exclude 'data/SAB/*.shx' \
             ./ hf_space/
           # Commit and push changes
           cd hf_space
           git add .
+          git commit -m "Fresh start without shapefiles"
+          git push -f origin main

.gitignore CHANGED Viewed

@@ -22,3 +22,7 @@ wheels/
 /.quarto/
 .cache/

 /.quarto/
 .cache/
+cache/
+data/KOR.zip
+*.ipynb
+*.json

.python-version ADDED Viewed

	@@ -0,0 +1 @@


1	+ 3.12

README.md ADDED Viewed

	@@ -0,0 +1,10 @@

+---
+title: Water Quality Report
+emoji: 💧
+colorFrom: blue
+colorTo: green
+sdk: streamlit
+sdk_version: 1.40.1
+app_file: app.py
+pinned: false
+---

analysis.py ADDED Viewed

	@@ -0,0 +1,1234 @@

+import altair as alt
+import contextily as ctx
+import geopandas as gpd
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import plotly.graph_objects as go
+import scipy.stats as stats
+import seaborn as sns
+from matplotlib.colors import LinearSegmentedColormap
+from matplotlib.figure import Figure
+from plotly.subplots import make_subplots
+def load_data(filename: str) -> pd.DataFrame:
+    return pd.read_csv(filename, dtype={"Station_Number": str}).assign(
+        Org_Result_Value=lambda df: pd.to_numeric(
+            df["Org_Result_Value"].replace("Not Reported", pd.NA), errors="coerce"
+        ),
+        Activity_Start_Date_Time=lambda df: pd.to_datetime(
+            df["Activity_Start_Date_Time"]
+        ),
+    )
+def plot_analyte_trends(
+    df: pd.DataFrame, analyte_names: list[str], sample_position: str, figsize=(15, 12)
+) -> Figure:
+    """
+    Create subplots of analyte trends for the given dataframe and analytes.
+    Parameters:
+    -----------
+    df : pandas DataFrame
+        The filtered dataframe containing data for a specific station and position
+    analyte_names : list[str]
+        List of analyte names to plot
+    figsize : tuple
+        Figure size in inches (width, height)
+    """
+    # Calculate number of rows needed (2 columns)
+    n_rows = (len(analyte_names) + 1) // 2
+    fig, axes = plt.subplots(n_rows, 2, figsize=figsize)
+    axes = axes.flatten()  # Flatten axes array for easier indexing
+    station_number = df["Station_Number"].iloc[0]
+    station_name = df["Name"].iloc[0]
+    if sample_position == "All":
+        sample_position_label = "Surface and Bottom"
+    else:
+        sample_position_label = sample_position
+    for idx, analyte_name in enumerate(analyte_names):
+        ax = axes[idx]
+        data = (
+            df[df["Org_Analyte_Name"] == analyte_name]
+            .assign(Year=lambda df: df["Activity_Start_Date_Time"].dt.year)
+            .dropna(subset=["Org_Result_Value"])
+        )
+        if data.empty:
+            ax.text(
+                0.5,
+                0.5,
+                f"No data available for {analyte_name}",
+                ha="center",
+                va="center",
+            )
+            continue
+        # Determine if log scale should be used
+        log_scale_analytes = [
+            "Turbidity",
+            "Fecal Coliform (MPN)",
+            "Total Nitrogen",
+            "Total Phosphorus",
+        ]
+        log_scale = analyte_name in log_scale_analytes
+        if log_scale:
+            ax.set_yscale("log")
+            ax.yaxis.set_major_formatter(plt.ScalarFormatter())  # type: ignore
+        # Create box plot
+        groups = data.groupby("Year")
+        positions = np.array(list(groups.groups.keys()))
+        group_data = [group["Org_Result_Value"] for name, group in groups]
+        ax.boxplot(
+            group_data,
+            positions=positions,
+            widths=0.6,
+            patch_artist=True,
+            boxprops=dict(facecolor="lightblue", color="blue", alpha=0.5),
+            medianprops=dict(color="blue"),
+            whiskerprops=dict(color="blue"),
+            capprops=dict(color="blue"),
+            flierprops=dict(color="blue", markeredgecolor="blue", alpha=0.5),
+        )
+        # Calculate and plot trend line
+        yearly_means = data.groupby("Year")["Org_Result_Value"].mean()
+        X = yearly_means.index.values.reshape(-1, 1)
+        y = yearly_means.values
+        # Plot means
+        ax.plot(X, y, "bo-", linewidth=1, markersize=4, label="Annual Mean")
+        # Calculate trend line
+        if len(X) > 1:  # Only calculate trend if we have more than one point
+            slope, intercept, r_value, p_value, std_err = stats.linregress(X.ravel(), y)
+            trend_line = slope * X.ravel() + intercept
+            ax.plot(X, trend_line, "r--", alpha=0.8, linewidth=1, label="Trend")
+            # Add statistics
+            stats_text = f"R²={r_value**2:.3f}\np={p_value:.3f}"  # type: ignore
+            ax.text(
+                0.02,
+                0.98,
+                stats_text,
+                transform=ax.transAxes,
+                verticalalignment="top",
+                bbox=dict(boxstyle="round", facecolor="white", alpha=0.8),
+                parse_math=False,
+            )
+        # Customize subplot
+        ax.set_title(f"{analyte_name}", pad=15)
+        ax.set_xlabel("Year")
+        analyte_unit = data["Org_Result_Unit"].iloc[0]
+        if analyte_name == "Depth, Secchi Disk Depth":
+            y_label = f"Depth ({analyte_unit})"
+        elif analyte_name == "pH":
+            y_label = None
+        elif analyte_name.startswith("Dissolved"):
+            y_label = f"DO ({analyte_unit})"
+        elif analyte_name.startswith("Fecal Coliform"):
+            y_label = f"Fecal Coliform ({analyte_unit})"
+        else:
+            y_label = f"{analyte_name} ({analyte_unit})"
+        ax.set_ylabel(y_label)
+        ax.grid(True, alpha=0.3)
+        # Add sample sizes
+        for year, group in groups:
+            ax.text(
+                year,
+                ax.get_ylim()[1],
+                f"n={len(group)}",
+                ha="center",
+                va="bottom",
+                fontsize=8,
+            )
+    # Remove any unused subplots
+    for idx in range(len(analyte_names), len(axes)):
+        fig.delaxes(axes[idx])
+    # Add overall title with more space
+    fig.suptitle(
+        f"Water Quality Trends for {station_number} - {station_name} - {sample_position_label}",
+        fontsize=14,
+        y=0.95,
+    )
+    # Adjust layout with more space
+    plt.tight_layout(rect=(0, 0, 1, 0.95))
+    return fig
+def altair_plot_sector_trends(
+    df: pd.DataFrame, analyte_names: list[str]
+) -> alt.VConcatChart:
+    """
+    Create plots of mean annual analyte trends by sector using Altair.
+    Parameters:
+    -----------
+    df : pd.DataFrame
+        Input dataframe
+    analyte_names : list[str]
+        List of analytes to plot
+    Returns:
+    --------
+    alt.VConcatChart
+        Vertically concatenated Altair charts for each analyte
+    """
+    # Custom color scheme matching the matplotlib version
+    color_scale = alt.Scale(
+        domain=df["Sector"].unique().tolist(),
+        range=[
+            "#1f77b4",  # blue
+            "#ff7f0e",  # orange
+            "#2ca02c",  # green
+            "#d62728",  # red
+            "#9467bd",  # purple
+            "#8c564b",  # brown
+            "#e377c2",  # pink
+            "#7f7f7f",  # gray
+        ],
+    )
+    charts = []
+    for analyte_name in analyte_names:
+        # Filter data for current analyte
+        analyte_data = df[df["Org_Analyte_Name"] == analyte_name].copy()
+        # For Salinity, exclude Fresh Water Lakes
+        if analyte_name == "Salinity":
+            analyte_data = analyte_data[analyte_data["Sector"] != "Fresh Water Lakes"]
+        # Calculate annual means and standard errors
+        processed_data = (
+            analyte_data.assign(Year=lambda df: df["Activity_Start_Date_Time"].dt.year)
+            .groupby(["Year", "Sector"])["Org_Result_Value"]
+            .agg(["mean", "sem"])
+            .reset_index()
+            .rename(columns={"mean": "Mean", "sem": "SE"})
+        )
+        # Add confidence interval bounds
+        processed_data["Upper"] = processed_data["Mean"] + processed_data["SE"]
+        processed_data["Lower"] = processed_data["Mean"] - processed_data["SE"]
+        # Get the unit for the y-axis label
+        unit = analyte_data["Org_Result_Unit"].iloc[0] if not analyte_data.empty else ""
+        # Determine if log scale should be used
+        use_log_scale = analyte_name in [
+            "Turbidity",
+            "Fecal Coliform (MPN)",
+            "Total Nitrogen",
+            "Total Phosphorus",
+        ]
+        # Create base chart
+        base = alt.Chart(processed_data).encode(
+            x=alt.X("Year:O", axis=alt.Axis(title=None)),
+            color=alt.Color("Sector:N", scale=color_scale),
+            tooltip=[
+                alt.Tooltip("Year:O"),
+                alt.Tooltip("Sector:N"),
+                alt.Tooltip("Mean:Q", format=".2f"),
+                alt.Tooltip("SE:Q", format=".2f"),
+            ],
+        )
+        # Create line and point layers
+        lines = base.mark_line().encode(
+            y=alt.Y(
+                "Mean:Q",
+                title=f"({unit})",
+                scale=alt.Scale(type="log" if use_log_scale else "linear"),
+            )
+        )
+        points = base.mark_point(size=50).encode(y=alt.Y("Mean:Q"))
+        # Create confidence interval area
+        area = base.mark_area(opacity=0.15).encode(
+            y=alt.Y("Lower:Q"), y2=alt.Y2("Upper:Q")
+        )
+        # Combine layers
+        chart = (
+            (area + lines + points)
+            .properties(
+                width=600,
+                height=300,
+                title=alt.TitleParams(text=analyte_name, anchor="middle", fontSize=14),
+            )
+            .interactive()
+        )
+        charts.append(chart)
+    # Combine all charts vertically
+    final_chart = alt.vconcat(*charts).configure(
+        view={"strokeWidth": 0}, axis={"grid": True, "gridOpacity": 0.2}
+    )
+    return final_chart
+def plotly_plot_analyte_trends(df: pd.DataFrame, analyte_names: list[str]) -> go.Figure:
+    """
+    Create subplots of analyte trends using Plotly for the given dataframe and analytes.
+    Parameters:
+    -----------
+    df : pandas DataFrame
+        The filtered dataframe containing data for a specific station and position
+    analyte_names : list[str]
+        List of analyte names to plot
+    Returns:
+    --------
+    go.Figure
+        Plotly figure containing the subplots
+    """
+    # Calculate number of rows needed (2 columns)
+    n_rows = (len(analyte_names) + 1) // 2
+    # Create subplot figure
+    fig = make_subplots(
+        rows=n_rows,
+        cols=2,
+        subplot_titles=analyte_names,
+        vertical_spacing=0.12,
+        horizontal_spacing=0.1,
+    )
+    station_number = df["Station_Number"].iloc[0]
+    sample_position = df["Sample_Position"].iloc[0]
+    for idx, analyte_name in enumerate(analyte_names):
+        row = idx // 2 + 1
+        col = idx % 2 + 1
+        data = (
+            df[df["Org_Analyte_Name"] == analyte_name]
+            .assign(Year=lambda df: df["Activity_Start_Date_Time"].dt.year)
+            .dropna(subset=["Org_Result_Value"])
+        )
+        if data.empty:
+            fig.add_annotation(
+                text=f"No data available for {analyte_name}",
+                xref=f"x{idx+1}",
+                yref=f"y{idx+1}",
+                x=0.5,
+                y=0.5,
+                showarrow=False,
+                row=row,
+                col=col,
+            )
+            continue
+        # Determine if log scale should be used
+        log_scale = analyte_name in ["Turbidity", "Fecal Coliform (MPN)"]
+        # Create box plot
+        groups = data.groupby("Year")
+        years = list(groups.groups.keys())
+        # Add box plot
+        fig.add_trace(
+            go.Box(
+                x=data["Year"],
+                y=data["Org_Result_Value"],
+                name="Box Plot",
+                boxpoints="outliers",
+                line=dict(color="blue"),
+                fillcolor="lightblue",
+                showlegend=False,
+            ),
+            row=row,
+            col=col,
+        )
+        # Calculate and plot means
+        yearly_means = data.groupby("Year")["Org_Result_Value"].mean()
+        # Add mean line
+        fig.add_trace(
+            go.Scatter(
+                x=years,
+                y=yearly_means.values,
+                mode="lines+markers",
+                name="Annual Mean",
+                line=dict(color="blue"),
+                showlegend=False,
+            ),
+            row=row,
+            col=col,
+        )
+        # Calculate and add trend line
+        if len(years) > 1:
+            X = np.array(years)
+            y = yearly_means.values
+            slope, intercept, r_value, p_value, std_err = stats.linregress(X, y)
+            trend_line = slope * X + intercept
+            fig.add_trace(
+                go.Scatter(
+                    x=years,
+                    y=trend_line,
+                    mode="lines",
+                    name="Trend",
+                    line=dict(color="red", dash="dash"),
+                    showlegend=False,
+                ),
+                row=row,
+                col=col,
+            )
+            # Add statistics annotation
+            stats_text = f"R² = {r_value**2:.3f}<br>p = {p_value:.3f}"  # type: ignore
+            fig.add_annotation(
+                text=stats_text,
+                xref=f"x{idx+1}",
+                yref=f"y{idx+1}",
+                x=min(years),  # type: ignore
+                y=max(data["Org_Result_Value"]),
+                showarrow=False,
+                bgcolor="white",
+                bordercolor="black",
+                borderwidth=1,
+                row=row,
+                col=col,
+            )
+        # Add sample size annotations
+        for year, group in groups:
+            fig.add_annotation(
+                text=f"n={len(group)}",
+                x=year,
+                y=max(data["Org_Result_Value"]),
+                showarrow=False,
+                font=dict(size=8),
+                row=row,
+                col=col,
+            )
+        # Update axes
+        if log_scale:
+            fig.update_yaxes(type="log", row=row, col=col)
+        fig.update_xaxes(title_text="Year", row=row, col=col)
+        fig.update_yaxes(
+            title_text=f'Value ({data["Org_Result_Unit"].iloc[0]})', row=row, col=col
+        )
+    # Update layout
+    fig.update_layout(
+        title=f"Water Quality Trends<br>Station {station_number} - {sample_position}",
+        title_x=0.5,
+        showlegend=False,
+        height=300 * n_rows + 100,
+        width=1000,
+        template="plotly_white",
+    )
+    return fig
+def plot_sector_trends(
+    df: pd.DataFrame, analyte_names: list[str], base_height: float = 4
+) -> Figure:
+    """
+    Create plots of mean annual analyte trends by sector.
+    Parameters:
+    -----------
+    df : pd.DataFrame
+        Input dataframe
+    analyte_names : list[str]
+        List of analytes to plot
+    base_height : float
+        Height per subplot in inches (default=4)
+    """
+    # Calculate figure dimensions
+    n_rows = len(analyte_names)
+    fig_height = base_height * n_rows
+    # Create figure with dynamic height
+    fig, axes = plt.subplots(n_rows, 1, figsize=(15, fig_height))
+    if n_rows == 1:
+        axes = [axes]
+    custom_colors = [
+        "#1f77b4",  # blue
+        "#ff7f0e",  # orange
+        "#2ca02c",  # green
+        "#d62728",  # red
+        "#9467bd",  # purple
+        "#8c564b",  # brown
+        "#e377c2",  # pink
+        "#7f7f7f",  # gray
+    ]
+    for idx, analyte_name in enumerate(analyte_names):
+        ax = axes[idx]
+        # Filter data for current analyte
+        analyte_data = df[df["Org_Analyte_Name"] == analyte_name]
+        # For Salinity, exclude Fresh Water Lakes
+        if analyte_name == "Salinity":
+            analyte_data = analyte_data[analyte_data["Sector"] != "Fresh Water Lakes"]
+        # Plot each sector with custom colors
+        for sector, color in zip(df["Sector"].unique(), custom_colors):
+            sector_data = (
+                analyte_data[analyte_data["Sector"] == sector]
+                .assign(Year=lambda df: df["Activity_Start_Date_Time"].dt.year)
+                .groupby("Year")["Org_Result_Value"]
+                .agg(["mean", "sem"])
+                .reset_index()
+            )
+            if not sector_data.empty:
+                # Plot mean line with error bands
+                ax.plot(
+                    sector_data["Year"],
+                    sector_data["mean"],
+                    "-o",
+                    color=color,
+                    label=sector,
+                    markersize=4,
+                    linewidth=2,  # Slightly thicker lines
+                )
+                # Add error bands with slightly reduced opacity
+                ax.fill_between(
+                    sector_data["Year"],
+                    sector_data["mean"] - sector_data["sem"],
+                    sector_data["mean"] + sector_data["sem"],
+                    color=color,
+                    alpha=0.15,  # Reduced opacity for better visibility
+                )
+        # Set x-axis to show only whole years
+        years = analyte_data["Activity_Start_Date_Time"].dt.year.unique()
+        ax.set_xticks(years)
+        ax.set_xticklabels(years.astype(int))
+        # Customize subplot with lighter titles and no x-label
+        ax.set_title(analyte_name, pad=10, fontsize=11, fontweight="normal")
+        ax.set_xlabel("")
+        if not analyte_data.empty:
+            analyte_unit = analyte_data["Org_Result_Unit"].iloc[0]
+            ax.set_ylabel(f"({analyte_unit})", fontsize=10)
+        # Improve grid appearance
+        ax.grid(True, alpha=0.2, linestyle="--")
+        ax.spines["top"].set_visible(False)
+        ax.spines["right"].set_visible(False)
+        # Simplified legend appearance (removed 3D effects)
+        ax.legend(
+            bbox_to_anchor=(1.05, 1),
+            loc="upper left",
+            borderaxespad=0.0,
+            frameon=True,
+            fancybox=False,
+            shadow=False,
+            fontsize=9,
+        )
+        if analyte_name in [
+            "Turbidity",
+            "Fecal Coliform (MPN)",
+            "Total Nitrogen",
+            "Total Phosphorus",
+        ]:
+            ax.set_yscale("log")
+    # Adjust layout with more vertical space between subplots
+    plt.tight_layout(rect=(0, 0, 0.85, 1), h_pad=2.0)
+    return fig
+def plot_parameter_correlations(
+    df: pd.DataFrame,
+    analyte_names: list[str],
+    subset_by: str,
+    subset: str,
+    filter_by: str,
+    threshold: float = 0.2,
+) -> Figure:
+    pivot_df = df[df["Org_Analyte_Name"].isin(analyte_names)].pivot_table(
+        index="Activity_Start_Date_Time",
+        columns="Org_Analyte_Name",
+        values="Org_Result_Value",
+        observed=False,
+    )
+    # Clean up column names
+    pivot_df = pivot_df.rename(
+        columns={
+            "Depth, Secchi Disk Depth": "Secchi Depth",
+            "Dissolved Oxygen": "DO",
+            "Fecal Coliform (MPN)": "Fecal Coliform",
+            "Total Nitrogen": "TN",
+            "Total Phosphorus": "TP",
+        }
+    )
+    # Calculate data completeness for each parameter
+    completeness = pivot_df.notna().mean()
+    valid_params = completeness[completeness >= threshold].index
+    excluded_params = completeness[completeness < threshold]
+    # Filter pivot_df to only include parameters meeting the threshold
+    pivot_df = pivot_df[valid_params]
+    # Calculate correlation matrix
+    corr = pivot_df.corr()
+    # Calculate sample size
+    n_samples = len(df)
+    # Create figure with more explicit spacing at the top
+    fig = plt.figure(figsize=(6, 7))
+    # Adjust gridspec ratios and spacing - modified to leave more room at top
+    gs = fig.add_gridspec(
+        3,
+        1,
+        height_ratios=[
+            1,  # Title space
+            4,  # Heatmap
+            1.5,  # Footnote
+        ],
+        hspace=0.4,
+    )
+    # Add title axes, heatmap axes, and footnote axes
+    title_ax = fig.add_subplot(gs[0])
+    heatmap_ax = fig.add_subplot(gs[1])
+    footnote_ax = fig.add_subplot(gs[2])
+    # Create heatmap
+    mask = np.triu(np.ones_like(corr, dtype=bool))
+    heatmap = sns.heatmap(
+        corr,
+        mask=mask,
+        annot=True,
+        cmap="RdBu_r",
+        center=0,
+        vmin=-1,
+        vmax=1,
+        ax=heatmap_ax,
+        yticklabels=1,
+        cbar=True,
+        xticklabels=1,
+    )
+    # Rotate x-axis labels and adjust their position
+    heatmap_ax.set_xticklabels(
+        heatmap_ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor"
+    )
+    # Move bottom axis labels down
+    heatmap_ax.tick_params(axis="x", pad=10)
+    # Fix the colorbar ticks warning by setting ticks first
+    colorbar = heatmap.figure.axes[-1]  # type: ignore
+    ticks = colorbar.get_yticks()
+    colorbar.set_yticks(ticks)
+    tick_labels = [f"{x:>8.2f}" for x in ticks]
+    colorbar.set_yticklabels(tick_labels)
+    # Rotate y-axis labels to horizontal
+    heatmap_ax.set_yticklabels(heatmap_ax.get_yticklabels(), rotation=0)
+    # Remove axis labels
+    heatmap_ax.set_xlabel("")
+    heatmap_ax.set_ylabel("")
+    # Configure footnote axis
+    footnote_ax.set_frame_on(False)  # Hide the frame
+    footnote_ax.set_xticks([])  # Remove x-ticks
+    footnote_ax.set_yticks([])  # Remove y-ticks
+    # Add footnote with adjusted position
+    if not excluded_params.empty:
+        footnote_text = "Excluded parameters (<{:.0%} data completeness):\n".format(
+            threshold
+        )
+        for param, completeness_val in excluded_params.items():
+            footnote_text += f"  - {param}: {completeness_val:.1%} complete\n"
+        footnote_ax.text(
+            0.01,
+            0.40,
+            footnote_text.rstrip(),
+            ha="left",
+            va="center",
+            fontsize=9,
+            fontstyle="italic",
+            transform=footnote_ax.transAxes,
+        )
+    # Do the same for title axis
+    title_ax.set_frame_on(False)
+    title_ax.set_xticks([])
+    title_ax.set_yticks([])
+    # Modify the filter_by text for display
+    display_filter = "Surface and Bottom" if filter_by == "All" else filter_by
+    # Add year information to the subtitle
+    year_info = df["Year"].iloc[0] if len(df["Year"].unique()) == 1 else "All Years"
+    # Add titles - using figure coordinates with adjusted positions
+    title_ax.text(
+        0.45,
+        0.8,  # Moved higher in figure coordinates
+        f"{subset_by}: {subset}",
+        ha="center",
+        va="center",
+        fontsize=12,
+        fontweight="bold",
+        transform=fig.transFigure,
+    )
+    title_ax.text(
+        0.45,
+        0.75,  # Moved higher in figure coordinates
+        f"{display_filter}, {year_info} (n={n_samples:,})",
+        ha="center",
+        va="bottom",
+        fontsize=10,
+        fontstyle="italic",
+        transform=fig.transFigure,
+    )
+    # Replace tight_layout with more explicit spacing control
+    # First, calculate the figure bounds
+    fig.canvas.draw()
+    # Get the tight_bbox
+    renderer = fig.canvas.get_renderer()  # type: ignore
+    fig.get_tightbbox(renderer)
+    # Adjust the subplot positions manually
+    fig.subplots_adjust(left=0.1, right=0.95, bottom=0.02, top=0.85, hspace=0.4)
+    return fig
+def plot_np_ratios(df: pd.DataFrame) -> Figure:
+    # Create dataframe with N, P, and Sector information
+    nutrients_df = (
+        df[df["Org_Analyte_Name"].isin(["Total Nitrogen", "Total Phosphorus"])]
+        .pivot_table(
+            index=["Activity_Start_Date_Time", "Sector"],
+            columns="Org_Analyte_Name",
+            values="Org_Result_Value",
+            observed=True,
+        )
+        .reset_index()
+    )
+    # Calculate N:P ratio
+    nutrients_df["N:P Ratio"] = (
+        nutrients_df["Total Nitrogen"] / nutrients_df["Total Phosphorus"]
+    )
+    # Create figure with two subplots
+    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10))
+    # Time series plot with colors by sector
+    sns.scatterplot(
+        data=nutrients_df,
+        x="Activity_Start_Date_Time",
+        y="N:P Ratio",
+        hue="Sector",
+        ax=ax1,
+        alpha=0.6,
+    )
+    ax1.axhline(y=16, color="r", linestyle="--", label="Redfield Ratio (16:1)")
+    ax1.set_ylabel("N:P Ratio")
+    ax1.set_xlabel("Date")
+    ax1.set_title("N:P Ratio Over Time")
+    # Adjust legend position
+    ax1.legend(bbox_to_anchor=(1.05, 1), loc="upper left")
+    # Histogram plot
+    sns.histplot(x=nutrients_df["N:P Ratio"].dropna(), ax=ax2)
+    ax2.axvline(x=16, color="r", linestyle="--", label="Redfield Ratio (16:1)")
+    ax2.set_xlabel("N:P Ratio")
+    ax2.set_title("Distribution of N:P Ratios")
+    ax2.legend()
+    # Adjust layout to accommodate legend
+    plt.tight_layout(rect=(0, 0, 0.9, 1))
+    return fig
+def altair_plot_np_ratios(df: pd.DataFrame) -> alt.VConcatChart:
+    # Create dataframe with N, P, and Sector information
+    nutrients_df = (
+        df[df["Org_Analyte_Name"].isin(["Total Nitrogen", "Total Phosphorus"])]
+        .pivot_table(
+            index=["Activity_Start_Date_Time", "Sector"],
+            columns="Org_Analyte_Name",
+            values="Org_Result_Value",
+            observed=True,
+        )
+        .reset_index()
+    )
+    # Calculate N:P ratio
+    nutrients_df["N:P Ratio"] = (
+        nutrients_df["Total Nitrogen"] / nutrients_df["Total Phosphorus"]
+    )
+    # Time series plot with colors by sector
+    time_series = (
+        alt.Chart(nutrients_df)
+        .mark_circle(size=60)
+        .encode(
+            x=alt.X(
+                "Activity_Start_Date_Time:T",
+                axis=alt.Axis(format="%Y", tickCount="year"),
+                title="Date",
+            ),
+            y=alt.Y(r"N\:P Ratio:Q", title="N:P Ratio"),
+            color="Sector:N",
+            tooltip=[
+                alt.Tooltip("Activity_Start_Date_Time:T", title="Date"),
+                alt.Tooltip(r"N\:P Ratio:Q", format=".0f", title="N:P Ratio"),
+                alt.Tooltip("Sector:N", title="Sector"),
+            ],
+        )
+        .properties(title="N:P Ratio Over Time", width=600, height=300)
+        .interactive()
+    )
+    # Add Redfield Ratio line
+    redfield_line = (
+        alt.Chart(pd.DataFrame({"y": [16]})).mark_rule(color="red").encode(y="y:Q")
+    )
+    # Histogram plot
+    histogram = (
+        alt.Chart(nutrients_df)
+        .mark_bar()
+        .encode(
+            x=alt.X(r"N\:P Ratio:Q", bin=alt.Bin(maxbins=30), title="N:P Ratio"),
+            y="count()",
+            tooltip=["count()"],
+        )
+        .properties(title="Distribution of N:P Ratios", width=600, height=300)
+        .interactive()
+    )
+    # Add Redfield Ratio line to histogram
+    redfield_hist_line = (
+        alt.Chart(pd.DataFrame({"x": [16]})).mark_rule(color="red").encode(x="x:Q")
+    )
+    # Combine plots
+    combined_chart = alt.vconcat(
+        time_series + redfield_line, histogram + redfield_hist_line
+    ).resolve_scale(y="independent")
+    return combined_chart
+def plot_calendar_heatmap(
+    df: pd.DataFrame, analyte: str, colormap: str | None = None
+) -> Figure:
+    data = df[df["Org_Analyte_Name"] == analyte].copy()
+    data["Year"] = data["Activity_Start_Date_Time"].dt.year
+    data["Month"] = data["Activity_Start_Date_Time"].dt.month
+    pivot_data = data.pivot_table(
+        values="Org_Result_Value", index="Year", columns="Month", aggfunc="mean"
+    )
+    # Choose appropriate colormap based on analyte type
+    if analyte in ["Fecal Coliform (MPN)"]:
+        cmap = "viridis"  # Blue-green-yellow
+    elif analyte in ["Temperature, Water"]:
+        cmap = "coolwarm"
+    elif analyte in ["Dissolved Oxygen"]:
+        cmap = "RdYlBu"
+    elif analyte in ["Total Nitrogen", "Total Phosphorus"]:
+        cmap = "GnBu"  # Green-Blue
+    elif analyte in ["Depth, Secchi Disk Depth"]:
+        cmap = "Blues_r"
+    else:
+        cmap = "Blues"  # Default blue gradient
+    # If colormap is set, override the analyte-specific default
+    if colormap:
+        cmap = colormap
+    fig, ax = plt.subplots(figsize=(6, len(pivot_data) * 0.5))
+    # Create heatmap
+    sns.heatmap(
+        pivot_data,
+        cmap=cmap,
+        annot=True,
+        fmt=".2f",
+        cbar_kws={"label": data["Org_Result_Unit"].iloc[0]},
+        annot_kws={"size": 6},
+    )
+    ax.set_title(f"Monthly Averages Heatmap: {analyte}", fontsize=10, pad=5)
+    ax.tick_params(axis="both", which="major", labelsize=7)
+    # Get the colorbar and adjust its label size
+    colorbar = ax.collections[0].colorbar
+    colorbar.ax.tick_params(labelsize=7)  # type: ignore
+    colorbar.set_label(data["Org_Result_Unit"].iloc[0], size=7)  # type: ignore
+    return fig
+def plot_seasonal_salinity(
+    salinity_data: pd.DataFrame,
+    year: str,
+    basemap_provider,
+    alpha=0.5,
+    shapefile_path="data/SAB/SAB.shp",
+):
+    """
+    Create seasonal plots of mean salinity values by WBID with basemap.
+    Args:
+        salinity_data: DataFrame containing salinity measurements
+        year: Year to filter data for (str)
+    """
+    # Read and filter WBIDs
+    wbids = gpd.read_file(shapefile_path)
+    relevant_wbids = salinity_data["WBID"].unique()
+    wbids = wbids[wbids["WBID"].isin(relevant_wbids)]
+    wbids = wbids.to_crs(epsg=3857)
+    # Process data - create a copy to avoid SettingWithCopyWarning
+    year_data = salinity_data[
+        salinity_data["Activity_Start_Date_Time"].dt.year == int(year)
+    ].copy()
+    # Add season column using loc
+    year_data.loc[:, "season"] = pd.cut(
+        year_data["Activity_Start_Date_Time"].dt.month,
+        bins=[0, 3, 6, 9, 12],
+        labels=["Winter", "Spring", "Summer", "Fall"],
+    )
+    # Calculate seasonal means with observed=True
+    seasonal_means = (
+        year_data.groupby(["WBID", "season"], observed=True)["Salinity"]
+        .mean()
+        .reset_index()
+    )
+    fig = plt.figure(figsize=(20, 14))
+    # Create custom colormap with focused range
+    colors = ["#08519c", "#73a9cf", "#fee090", "#fc8d59", "#d73027"]
+    cmap = LinearSegmentedColormap.from_list("custom", colors, N=100)
+    # Get global min/max for consistent colormap
+    vmin = seasonal_means["Salinity"].min()
+    vmax = 40
+    # Calculate map extent
+    bounds = wbids.total_bounds
+    x_buffer = (bounds[2] - bounds[0]) * 0.05
+    y_buffer = (bounds[3] - bounds[1]) * 0.05
+    extent = [
+        bounds[0] - x_buffer,
+        bounds[2] + x_buffer,
+        bounds[1] - y_buffer,
+        bounds[3] + y_buffer,
+    ]
+    # Create subplots with tighter spacing
+    gs = fig.add_gridspec(
+        2,
+        2,
+        width_ratios=[1, 1],
+        wspace=0.05,  # Minimal horizontal space between plots
+        hspace=-0.15,  # More negative value to further reduce vertical space
+        left=0.02,  # Left margin
+        right=0.98,  # Right margin
+        top=0.95,  # Slightly reduced top margin to give more space
+        bottom=0.05,  # Slightly increased bottom margin to give more space
+    )
+    for idx, season in enumerate(["Winter", "Spring", "Summer", "Fall"]):
+        ax = fig.add_subplot(gs[idx // 2, idx % 2])
+        season_data = seasonal_means[seasonal_means["season"] == season]
+        merged = wbids.merge(season_data, on="WBID", how="left")
+        # Plot WBIDs
+        merged.plot(
+            column="Salinity",
+            ax=ax,
+            cmap=cmap,
+            vmin=vmin,
+            vmax=vmax,
+            alpha=0.7,
+            missing_kwds={"color": "lightgrey", "alpha": 0.5},
+        )
+        ctx.add_basemap(ax, source=basemap_provider, zoom=11, alpha=alpha)  # type: ignore
+        ax.set_xlim(extent[0], extent[1])
+        ax.set_ylim(extent[2], extent[3])
+        # Adjust title position
+        if idx < 2:  # Top row
+            ax.set_title(
+                f"{season} {year} Mean Salinity", pad=15
+            )  # More padding for top row
+        else:  # Bottom row
+            ax.set_title(
+                f"{season} {year} Mean Salinity", pad=5
+            )  # Less padding for bottom row
+        ax.set_axis_off()
+    # Add colorbar
+    norm = plt.Normalize(vmin=vmin, vmax=vmax)  # type: ignore
+    sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
+    sm.set_array([])
+    fig.colorbar(
+        sm,
+        ax=fig.axes,
+        orientation="vertical",
+        label="Salinity (ppt)",
+        pad=0.01,
+        fraction=0.015,
+        ticks=np.arange(0, 45, 5),  # Add ticks every 5 units
+    )
+    return fig
+def plot_seasonal_salinity_for_bays(
+    salinity_data: pd.DataFrame,
+    year: str,
+    basemap_provider=ctx.providers.USGS.USTopo,  # type: ignore
+    alpha=0.5,
+    shapefile_path="data/SAB/SAB.shp",
+):
+    """
+    Create seasonal plots of mean salinity values by WBID for N, E, W, SAB, GL and Lake Powell.
+    """
+    fig = plot_seasonal_salinity(
+        salinity_data.query(
+            "WBID.isin(['1061A', '1061B', '1061C', '1061D', '1061E', '1061F', '1061G', '1061H', '1055A'])"
+        ),
+        year=year,
+        basemap_provider=basemap_provider,
+        alpha=alpha,
+        shapefile_path=shapefile_path,
+    )
+    return fig
+def plot_do_temp_relationship(df: pd.DataFrame) -> Figure:
+    """
+    Create a scatter plot of DO vs temperature with regression line using seaborn.
+    Parameters:
+    -----------
+    df : pd.DataFrame
+        Input dataframe containing DO and temperature measurements
+    Returns:
+    --------
+    Figure
+        Matplotlib figure containing the plot
+    """
+    do_temp_data = (
+        df[df["Org_Analyte_Name"].isin(["Dissolved Oxygen", "Temperature, Water"])]
+        .pivot_table(
+            index=["Activity_Start_Date_Time", "Station_Number", "Sample_Position"],
+            columns="Org_Analyte_Name",
+            values="Org_Result_Value",
+            observed=False,
+        )
+        .reset_index()
+        .dropna(subset=["Dissolved Oxygen", "Temperature, Water"])
+    )
+    sns.set_palette("muted")
+    # Create plot with regression line
+    g = sns.lmplot(
+        data=do_temp_data,
+        x="Temperature, Water",
+        y="Dissolved Oxygen",
+        hue="Sample_Position",
+        hue_order=["Surface", "Bottom"],
+        scatter_kws={"alpha": 0.6},
+        height=8,
+        aspect=1.5,
+        legend=False,
+    )
+    # Add DO threshold and customize plot
+    ax = g.axes[0, 0]
+    ax.axhline(y=5, color="red", linestyle=":", alpha=0.5)
+    ax.text(
+        ax.get_xlim()[0],
+        5.1,
+        " 5 mg/L DO threshold",
+        ha="left",
+        va="bottom",
+        color="red",
+        alpha=0.5,
+    )
+    g.set_axis_labels("Water Temperature (°C)", "Dissolved Oxygen (mg/L)")
+    ax.set_title("Dissolved Oxygen vs Water Temperature", pad=20, fontsize=16)
+    ax.legend(title="Sample Position", bbox_to_anchor=(1.05, 1), loc="upper left")
+    # Add grid
+    ax.grid(True, alpha=0.3)
+    return g.figure
+def altair_plot_do_temp_relationship(df: pd.DataFrame) -> alt.LayerChart:
+    """
+    Create an interactive scatter plot of DO vs temperature with regression lines using Altair.
+    Matches the style and features of the original matplotlib/seaborn plot.
+    Parameters:
+    -----------
+    df : pd.DataFrame
+        Input dataframe containing DO and temperature measurements
+    Returns:
+    --------
+    alt.Chart
+        Altair chart object
+    """
+    # Prepare the data similarly to the original function
+    do_temp_data = (
+        df[df["Org_Analyte_Name"].isin(["Dissolved Oxygen", "Temperature, Water"])]
+        .pivot_table(
+            index=[
+                "Activity_Start_Date_Time",
+                "Station_Number",
+                "Sample_Position",
+                "Sector",
+            ],
+            columns="Org_Analyte_Name",
+            values="Org_Result_Value",
+            observed=False,
+        )
+        .reset_index()
+        .dropna(subset=["Dissolved Oxygen", "Temperature, Water"])
+    )
+    # Create the base scatter plot
+    scatter = (
+        alt.Chart(do_temp_data)
+        .mark_circle(size=60, opacity=0.6)
+        .encode(
+            x=alt.X(
+                "Temperature, Water:Q",
+                title="Water Temperature (°C)",
+                scale=alt.Scale(zero=False),
+            ),
+            y=alt.Y(
+                "Dissolved Oxygen:Q",
+                title="Dissolved Oxygen (mg/L)",
+                scale=alt.Scale(zero=False),
+            ),
+            color=alt.Color(
+                "Sample_Position:N",
+                scale=alt.Scale(
+                    domain=["Surface", "Bottom"],
+                    range=["#8da0cb", "#fc8d62"],  # Muted blue and orange
+                ),
+                legend=alt.Legend(title="Sample Position"),
+            ),
+            tooltip=[
+                alt.Tooltip("Temperature, Water:Q", title="Temperature", format=".1f"),
+                alt.Tooltip("Dissolved Oxygen:Q", title="DO", format=".1f"),
+                alt.Tooltip("Sample_Position:N", title="Position"),
+                alt.Tooltip("Sector:N", title="Sector"),
+                alt.Tooltip("Station_Number:N", title="Station"),
+            ],
+        )
+    )
+    # Add regression lines for each Sample_Position
+    regression = (
+        scatter.transform_regression(
+            "Temperature, Water", "Dissolved Oxygen", groupby=["Sample_Position"]
+        )
+        .mark_line(size=2)
+        .encode(
+            color=alt.Color(
+                "Sample_Position:N",
+                scale=alt.Scale(
+                    domain=["Surface", "Bottom"], range=["#8da0cb", "#fc8d62"]
+                ),
+            )
+        )
+    )
+    # Create DO threshold line
+    threshold_df = pd.DataFrame({"y": [5]})
+    threshold_line = (
+        alt.Chart(threshold_df)
+        .mark_rule(strokeDash=[4, 4], color="red", opacity=0.5)
+        .encode(y="y:Q")
+    )
+    # Add threshold label
+    threshold_label = (
+        alt.Chart(
+            pd.DataFrame({"x": [do_temp_data["Temperature, Water"].min()], "y": [5.1]})
+        )
+        .mark_text(
+            align="left",
+            baseline="bottom",
+            color="red",
+            opacity=0.5,
+            text=" 5 mg/L DO threshold",
+        )
+        .encode(x="x:Q", y="y:Q")
+    )
+    # Combine all layers and configure
+    final_chart = (
+        alt.layer(scatter, regression, threshold_line, threshold_label)
+        .properties(
+            width=800,
+            height=750,
+        )
+        .configure_axis(grid=True, gridOpacity=0.3)
+        .interactive()
+    )
+    return final_chart

app.py ADDED Viewed

	@@ -0,0 +1,1070 @@

+import io
+import json
+import os
+import textwrap
+import time
+import uuid
+from datetime import datetime
+from functools import wraps
+from pathlib import Path
+import pandas as pd
+import plotly.express as px
+import streamlit as st
+from great_tables import GT, html
+from matplotlib import pyplot as plt
+from analysis import (
+    altair_plot_do_temp_relationship,
+    altair_plot_np_ratios,
+    altair_plot_sector_trends,
+    plot_analyte_trends,
+    plot_calendar_heatmap,
+    plot_do_temp_relationship,
+    plot_np_ratios,
+    plot_parameter_correlations,
+    plot_seasonal_salinity_for_bays,
+    plot_sector_trends,
+)
+from main import (
+    create_multiindex_columns,
+    create_overall_summary,
+    create_summary_by_station_and_position,
+    get_analyte_data_with_lat_long,
+    get_raw_data,
+    get_stations_data,
+)
+def log_visit():
+    """Log visitor analytics including timestamp, user agent, and page info"""
+    if st.session_state.get("admin_authenticated", False):
+        return
+    log_file = Path("analytics.json")
+    now = datetime.now()
+    today = now.strftime("%Y-%m-%d")
+    if "visitor_id" not in st.session_state:
+        st.session_state.visitor_id = str(uuid.uuid4())
+    try:
+        user_agent = st.context.headers.get("User-Agent", "Unknown")
+    except Exception:
+        user_agent = "Unknown"
+    visit_type = (
+        "initial" if not st.session_state.get("logged_visit") else "section_change"
+    )
+    visit_data = {
+        "timestamp": now.isoformat(),
+        "date": today,
+        "user_agent": user_agent,
+        "visitor_id": st.session_state.visitor_id,
+        "page_section": st.session_state.get("current_section", "Overall Summary"),
+        "visit_type": visit_type,
+        "query_params": dict(st.query_params),
+    }
+    if log_file.exists():
+        with open(log_file, "r") as f:
+            data = json.load(f)
+            if "visits" not in data:
+                data["visits"] = []
+            if "daily_counts" not in data:
+                data["daily_counts"] = {}
+            if "section_counts" not in data:
+                data["section_counts"] = {}
+            if "daily_visitors" not in data:
+                data["daily_visitors"] = {}
+    else:
+        data = {
+            "visits": [],
+            "daily_counts": {},
+            "section_counts": {},
+            "daily_visitors": {},
+        }
+    if today not in data["daily_visitors"]:
+        data["daily_visitors"][today] = []
+    if st.session_state.visitor_id not in data["daily_visitors"][today]:
+        data["daily_visitors"][today].append(st.session_state.visitor_id)
+        data["daily_counts"][today] = len(data["daily_visitors"][today])
+    data["visits"].append(visit_data)
+    current_section = visit_data["page_section"]
+    data["section_counts"][current_section] = (
+        data["section_counts"].get(current_section, 0) + 1
+    )
+    with open(log_file, "w") as f:
+        json.dump(data, f, indent=2)
+if not st.session_state.get("logged_visit"):
+    log_visit()
+    st.session_state["logged_visit"] = True
+ADMIN_PASSWORD = os.getenv("ADMIN_PASSWORD", "")
+ENABLE_TIMING = False
+ENABLE_ALTAIR = False
+def check_admin_access():
+    """Handle admin authentication logic only"""
+    if not ADMIN_PASSWORD:
+        return False
+    if "admin_authenticated" not in st.session_state:
+        st.session_state.admin_authenticated = False
+    return st.session_state.admin_authenticated
+def render_admin_panel():
+    """Handle admin UI elements only"""
+    with st.sidebar:
+        st.markdown("---")
+        with st.expander("🔒 Admin", expanded=False):
+            if st.session_state.admin_authenticated:
+                if st.button("Logout"):
+                    st.session_state.admin_authenticated = False
+                    st.rerun()
+            else:
+                password_input = st.text_input("Password", type="password")
+                if st.button("Login"):
+                    if password_input == ADMIN_PASSWORD:
+                        st.session_state.admin_authenticated = True
+                        st.rerun()
+                    else:
+                        st.error("Incorrect password")
+st.set_page_config(
+    page_title="Water Quality Summary",
+    page_icon="💧",
+    layout="wide",
+    initial_sidebar_state="expanded",
+    menu_items={"Get Help": None, "Report a bug": None, "About": None},
+)
+st.sidebar.title("Navigation")
+sections = [
+    "Overall Summary",
+    "Summary by Station",
+    "Nutrient Ratios",
+    "Sector Trends",
+    "Trends by Station",
+    "Parameter Correlations",
+    "DO/Temp Relationship",
+    "Calendar Heatmaps",
+    "Seasonal Trends",
+    "Raw Data",
+]
+is_admin = check_admin_access()
+if is_admin:
+    sections.append("Analytics")
+    if is_admin:
+        ENABLE_TIMING = st.sidebar.toggle("Enable Timing", value=ENABLE_TIMING)
+section = st.sidebar.radio(
+    "Go to",
+    sections,
+)
+if not st.session_state.get("admin_authenticated", False) and (
+    "current_section" not in st.session_state
+    or st.session_state.current_section != section
+):
+    st.session_state.current_section = section
+    log_visit()
+if not st.session_state.get("admin_authenticated", False) and not st.session_state.get(
+    "logged_visit"
+):
+    log_visit()
+    st.session_state["logged_visit"] = True
+if section == "Overall Summary":
+    render_admin_panel()
+def summarize_parameter_value(value: str, max_length: int = 100) -> str:
+    """Summarize parameter values that are too long or complex."""
+    if not value:
+        return ""
+    # Handle DataFrames
+    if "DataFrame" in value and "[" in value and "]" in value:
+        try:
+            # Extract dimensions if present in string like "DataFrame[1000x20]"
+            dims = value[value.find("[") + 1 : value.find("]")]
+            return f"DataFrame[{dims}]"
+        except Exception:
+            return "DataFrame"
+    # Handle lists, tuples, and other sequences
+    if value.startswith(("[", "(", "{")):
+        try:
+            # Count items if it's a sequence
+            item_count = value.count(",") + 1
+            return f"{value[:20]}... ({item_count} items)"
+        except Exception:
+            return f"{value[:20]}..."
+    # Handle long strings
+    if len(value) > max_length:
+        return f"{value[:max_length]}..."
+    return value
+def timer(include_params=False):
+    def decorator(func):
+        @wraps(func)
+        def wrapper(*args, **kwargs):
+            if not ENABLE_TIMING:
+                return func(*args, **kwargs)
+            start = time.perf_counter()
+            result = func(*args, **kwargs)
+            end = time.perf_counter()
+            duration = end - start
+            # Initialize timing_stats if it doesn't exist
+            if "timing_stats" not in st.session_state:
+                st.session_state.timing_stats = {}
+                st.session_state.timing_logs = []
+            # Initialize list for this function if it doesn't exist
+            if func.__name__ not in st.session_state.timing_stats:
+                st.session_state.timing_stats[func.__name__] = []
+            # Append new duration to the list
+            st.session_state.timing_stats[func.__name__].append(duration)
+            # Create log entry with optional parameter info
+            log_entry = {
+                "timestamp": datetime.now().isoformat(),
+                "function": func.__name__,
+                "duration": duration,
+            }
+            if include_params:
+                # Get parameter names from function signature
+                import inspect
+                sig = inspect.signature(func)
+                param_names = list(sig.parameters.keys())
+                # Combine args and kwargs into a parameter dictionary
+                param_values = {}
+                for i, arg in enumerate(args):
+                    if i < len(param_names):
+                        param_values[param_names[i]] = summarize_parameter_value(
+                            str(arg),
+                            max_length=40,
+                        )
+                param_values.update(
+                    {
+                        k: summarize_parameter_value(str(v), max_length=40)
+                        for k, v in kwargs.items()
+                    }
+                )
+                log_entry["parameters"] = param_values
+            st.session_state.timing_logs.append(log_entry)
+            return result
+        return wrapper
+    return decorator
+@timer(include_params=False)
+def load_raw_data():
+    return get_raw_data("data/master_data_file_2019-01-01_-_2024-10-31.parquet")
+@timer(include_params=False)
+def create_summaries(raw_df):
+    summary_by_station = create_summary_by_station_and_position(raw_df)
+    overall_summary = create_overall_summary(raw_df)
+    multiindex_df = create_multiindex_columns(summary_by_station)
+    return summary_by_station, overall_summary, multiindex_df
+@timer(include_params=False)
+def prepare_downloads(summary_by_station, multiindex_df, raw_df):
+    summary_csv = summary_by_station.reset_index().to_csv(index=False)
+    excel_buffer = io.BytesIO()
+    with pd.ExcelWriter(excel_buffer, engine="xlsxwriter") as writer:
+        multiindex_df.to_excel(writer, sheet_name="Water Quality Summary")
+    summary_excel = excel_buffer.getvalue()
+    raw_csv = raw_df.to_csv(index=False)
+    return {
+        "summary": {
+            "CSV": (summary_csv, "csv", "text/csv"),
+            "Excel": (summary_excel, "xlsx", "application/vnd.ms-excel"),
+        },
+        "raw": {
+            "CSV": (raw_csv, "csv", "text/csv"),
+        },
+    }
+@timer(include_params=False)
+def load_seasonal_data(raw_df, analyte):
+    """Load and prepare data for seasonal trends analysis"""
+    return get_analyte_data_with_lat_long(raw_df, analyte)
+@timer(include_params=True)
+def generate_seasonal_plot(data, year, shapefile_path):
+    """Generate the seasonal trends plot"""
+    return plot_seasonal_salinity_for_bays(data, year, shapefile_path=shapefile_path)
+# if ENABLE_TIMING:
+#     def load_data():
+#         """
+#         Load all data views needed by the application.
+#         Returns:
+#             dict: Contains different views of the data
+#         """
+#         raw_df = load_raw_data()
+#         summary_by_station, overall_summary, multiindex_df = create_summaries(raw_df)
+#         downloads = prepare_downloads(summary_by_station, multiindex_df, raw_df)
+#         return {
+#             "raw_df": raw_df,
+#             "summary_by_station": summary_by_station,
+#             "overall_summary": overall_summary,
+#             "multiindex_df": multiindex_df,
+#             "downloads": downloads,
+#         }
+# else:
+@st.cache_data
+def load_data():
+    """
+    Load and cache all data views needed by the application.
+    Returns:
+        dict: Contains different views of the data
+    """
+    raw_df = load_raw_data()
+    summary_by_station, overall_summary, multiindex_df = create_summaries(raw_df)
+    downloads = prepare_downloads(summary_by_station, multiindex_df, raw_df)
+    return {
+        "raw_df": raw_df,
+        "summary_by_station": summary_by_station,
+        "overall_summary": overall_summary,
+        "multiindex_df": multiindex_df,
+        "downloads": downloads,
+    }
+@st.cache_data
+def generate_correlation_plot(
+    subset_df, analyte_names, subset_by, subset, position_filter
+):
+    fig = plot_parameter_correlations(
+        subset_df, analyte_names, subset_by, subset, position_filter
+    )
+    return fig
+def create_overall_summary_table(df: pd.DataFrame) -> GT:
+    df.index.name = "Statistic"
+    df = df.reset_index()
+    return (
+        GT(df, rowname_col="Statistic")
+        .tab_header(
+            title="Overall Water Quality",
+            subtitle="Summary statistics for all data analyzed during study period",
+        )
+        .fmt_number(
+            columns=[
+                "Secchi Depth (feet)",
+                "Temperature (°C)",
+                "Dissolved Oxygen (mg/L)",
+            ],
+            decimals=1,
+        )
+        .fmt_integer(
+            columns=list(df.columns[1:]),
+            rows=lambda x: x["Statistic"] == "Count",  # type: ignore
+            use_seps=True,
+        )
+        .cols_label(
+            **{
+                col: html(f"{col.rpartition(' ')[0]}<br>{col.rpartition(' ')[-1]}")
+                if col != "pH"
+                else html(f"{col}<br>&nbsp;")
+                for col in df.columns[1:]
+            }  # type: ignore
+        )
+        .cols_width(cases={col: "14%" for col in df.columns[1:]})
+        .opt_align_table_header(align="center")
+    )
+data = load_data()
+if section == "Overall Summary":
+    st.title("Overall Summary")
+    st.html(create_overall_summary_table(data["overall_summary"]).as_raw_html())
+    st.markdown("### Sampling Stations Map")
+    stations_df = get_stations_data()
+    fig = px.scatter_mapbox(
+        stations_df,
+        lat="Latitude",
+        lon="Longitude",
+        hover_data={
+            "Number": True,
+            "U_of_F": True,
+            "Sector": True,
+            "WBID": True,
+            "Latitude": False,
+            "Longitude": False,
+        },
+        hover_name="Name",
+        zoom=10,
+        height=700,
+        labels={
+            "Number": "Station Number",
+            "U_of_F": "ID",
+            "Sector": "Sector",
+            "WBID": "WBID",
+        },
+    )
+    fig.update_layout(
+        mapbox_style="carto-positron",
+        margin={"r": 0, "t": 0, "l": 0, "b": 0},
+    )
+    st.plotly_chart(fig, use_container_width=True)
+elif section == "Summary by Station":
+    st.title("Summary by Station")
+    download_format = st.radio(
+        "Select download format:",
+        ["CSV", "Excel"],
+        key="summary_download",
+        horizontal=True,
+    )
+    download_data = data["downloads"]["summary"][download_format]
+    st.download_button(
+        label=f"Download Summary Data ({download_format})",
+        data=download_data[0],
+        file_name=f"water_quality_summary.{download_data[1]}",
+        mime=download_data[2],
+    )
+    st.markdown("""
+    This table shows summary statistics for various water quality measurements across different stations.
+    Each station's measurements are broken down into surface and bottom readings where applicable.
+    """)
+    st.dataframe(
+        data["multiindex_df"]
+        .style.format(precision=2)
+        .highlight_null(props="background-color: lightgray"),
+        use_container_width=True,
+        height=600,
+    )
+    st.markdown("---")
+    total_stations = len(data["summary_by_station"].index.get_level_values(0).unique())
+    st.markdown(f"Total number of stations: **{total_stations}**")
+elif section == "Trends by Station":
+    st.title("Trends by Station")
+    analyte_names = [
+        "Dissolved Oxygen",
+        "Salinity",
+        "pH",
+        "Depth, Secchi Disk Depth",
+        "Turbidity",
+        "Fecal Coliform (MPN)",
+        "Total Nitrogen",
+        "Total Phosphorus",
+    ]
+    st.sidebar.markdown("### Filter Options")
+    selected_station = st.sidebar.selectbox(
+        "Station:",
+        sorted(data["raw_df"]["Station_Number"].unique()),
+        index=sorted(data["raw_df"]["Station_Number"].unique()).index("3.20"),
+    )
+    selected_position = st.sidebar.segmented_control(
+        "Sample Position:",
+        ("All", "Surface", "Bottom"),
+        default="All",
+        selection_mode="single",
+    )
+    selected_position = selected_position or "All"
+    filtered_df = data["raw_df"].query("Station_Number == @selected_station")
+    if selected_position != "All":
+        filtered_df = filtered_df.query("Sample_Position == @selected_position")
+    csv_buffer = io.StringIO()
+    filtered_df.to_csv(csv_buffer, index=False)
+    st.sidebar.download_button(
+        label="Download Filtered Data (CSV)",
+        data=csv_buffer.getvalue(),
+        file_name=f"station_{selected_station}_{selected_position.lower()}_data.csv",
+        mime="text/csv",
+    )
+    with st.sidebar.expander("Preview Filtered Data"):
+        st.markdown(f"**{len(filtered_df):,}** records")
+        display_columns = [
+            "Activity_Start_Date_Time",
+            "Sample_Position",
+            "Org_Analyte_Name",
+            "Org_Result_Value",
+            "Org_Result_Unit",
+        ]
+        preview_df = filtered_df[["Station_Number"] + display_columns].copy()
+        preview_df.set_index("Station_Number", inplace=True)
+        st.dataframe(
+            preview_df.style.format(precision=2),
+            use_container_width=True,
+            height=300,
+        )
+    if not filtered_df.empty:
+        fig = plot_analyte_trends(filtered_df, analyte_names, selected_position)
+        st.pyplot(fig)
+    else:
+        st.warning(
+            "No data available for the selected station and position combination."
+        )
+elif section == "Sector Trends":
+    st.title("Sector Trends")
+    ENABLE_ALTAIR = st.sidebar.toggle("Interactive Plots", value=ENABLE_ALTAIR)
+    default_analytes = [
+        "Dissolved Oxygen",
+        "Salinity",
+        "Depth, Secchi Disk Depth",
+        "Total Nitrogen",
+        "Total Phosphorus",
+    ]
+    all_analytes = default_analytes + [
+        x
+        for x in sorted(data["raw_df"]["Org_Analyte_Name"].unique())
+        if x not in default_analytes
+    ]
+    selected_analytes = st.sidebar.multiselect(
+        "Select Analytes:",
+        options=all_analytes,
+        default=default_analytes,
+        key="sector_analyte_select",
+        help="Choose one or more analytes to plot.",
+    )
+    if selected_analytes and not data["raw_df"].empty:
+        if ENABLE_ALTAIR:
+            charts = altair_plot_sector_trends(data["raw_df"], selected_analytes)
+            st.altair_chart(charts, use_container_width=True)  # type: ignore
+        else:
+            fig = plot_sector_trends(data["raw_df"], selected_analytes, base_height=3.5)
+            st.pyplot(fig)
+    elif not selected_analytes:
+        st.warning("No analytes selected.")
+    else:
+        st.warning("No data available for the selected analytes.")
+elif section == "Parameter Correlations":
+    st.title("Parameter Correlations")
+    subset_by = "Sector"
+    st.sidebar.markdown("### Filter Options")
+    position_filter = st.sidebar.selectbox(
+        "Sample Position:", ["All", "Surface", "Bottom"], index=0
+    )
+    with st.spinner("Loading data for correlation plots..."):
+        analyte_names = [
+            "Dissolved Oxygen",
+            "Salinity",
+            "pH",
+            "Depth, Secchi Disk Depth",
+            "Turbidity",
+            "Fecal Coliform (MPN)",
+            "Total Nitrogen",
+            "Total Phosphorus",
+        ]
+        raw_df = data["raw_df"]
+        raw_df["Year"] = raw_df["Activity_Start_Date_Time"].dt.year
+        years = ["All"] + sorted(raw_df["Year"].unique().tolist(), reverse=True)
+        year_filter = st.sidebar.selectbox("Year:", years, index=0)
+    plot_df = raw_df.copy()
+    if position_filter != "All":
+        plot_df = plot_df[plot_df["Sample_Position"] == position_filter]
+    if year_filter != "All":
+        plot_df = plot_df[plot_df["Year"] == year_filter]
+    plot_df_download = plot_df.copy()
+    csv_buffer = io.StringIO()
+    plot_df_download.to_csv(csv_buffer, index=False)
+    st.sidebar.download_button(
+        label="Download Filtered Data (CSV)",
+        data=csv_buffer.getvalue(),
+        file_name=f"correlation_data_{subset_by}_{position_filter}_{year_filter}.csv",
+        mime="text/csv",
+    )
+    st.sidebar.markdown("### Group By")
+    subset_by = st.sidebar.selectbox(
+        "Group correlations by:", ["Sector", "Waterbody_Class"], index=0
+    )
+    unique_subsets = sorted(plot_df[subset_by].unique())
+    selected_groups = st.sidebar.multiselect(
+        "Select groups to display:",
+        options=unique_subsets,
+        default=unique_subsets,
+        key="group_selector",
+    )
+    # Add ordering control
+    order_by = st.sidebar.radio(
+        "Order groups by:", ["Number of Records", "Alphabetical"], key="group_order"
+    )
+    ### FIX THIS
+    # Add download button for grouped correlation data
+    if selected_groups:
+        grouped_data = []
+        for group in selected_groups:
+            subset_df = plot_df[plot_df[subset_by] == group]
+            if not subset_df.empty:
+                # Filter for just the analytes we want to correlate
+                analyte_df = subset_df[
+                    subset_df["Org_Analyte_Name"].isin(analyte_names)
+                ].copy()
+                analyte_df["Group"] = group
+                grouped_data.append(analyte_df)
+        if grouped_data:
+            combined_data = pd.concat(grouped_data)
+            csv_buffer = io.StringIO()
+            combined_data.to_csv(csv_buffer, index=False)
+            st.sidebar.download_button(
+                label="Download Grouped Correlation Data (CSV)",
+                data=csv_buffer.getvalue(),
+                file_name=f"grouped_correlation_data_{subset_by}_{position_filter}_{year_filter}.csv",
+                mime="text/csv",
+            )
+    # Order the selected groups
+    if order_by == "Number of Records":
+        group_counts = {
+            group: len(plot_df[plot_df[subset_by] == group])
+            for group in selected_groups
+        }
+        selected_groups = sorted(
+            selected_groups, key=lambda x: group_counts[x], reverse=True
+        )
+    else:
+        selected_groups = sorted(selected_groups)
+    # Loop with filtered groups
+    cols = st.columns(2)
+    for idx, subset in enumerate(selected_groups):
+        subset_df = plot_df[plot_df[subset_by] == subset]
+        if not subset_df.empty:
+            fig = generate_correlation_plot(
+                subset_df, analyte_names, subset_by, subset, position_filter
+            )
+            cols[idx % 2].pyplot(fig)
+            plt.close()
+            with cols[idx % 2].expander(f"View {subset} Data"):
+                st.markdown(f"**{len(subset_df):,}** records")
+                display_columns = [
+                    "Activity_Start_Date_Time",
+                    "Station_Number",
+                    "Sample_Position",
+                    "Org_Analyte_Name",
+                    "Org_Result_Value",
+                    "Org_Result_Unit",
+                ]
+                st.dataframe(
+                    subset_df[display_columns].style.format(precision=2),
+                    use_container_width=True,
+                    height=300,
+                )
+                csv_buffer = io.StringIO()
+                subset_df.to_csv(csv_buffer, index=False)
+                st.download_button(
+                    label=f"Download {subset} Data (CSV)",
+                    data=csv_buffer.getvalue(),
+                    file_name=f"correlation_data_{subset}_{position_filter}_{year_filter}.csv",
+                    mime="text/csv",
+                )
+elif section == "DO/Temp Relationship":
+    ENABLE_ALTAIR = st.sidebar.toggle("Interactive Plot", value=ENABLE_ALTAIR)
+    st.title("DO/Temp Relationship")
+    st.markdown(
+        "This plot shows the relationship between dissolved oxygen and water temperature for all data."
+    )
+    if ENABLE_ALTAIR:
+        fig = altair_plot_do_temp_relationship(data["raw_df"])
+        st.altair_chart(fig, use_container_width=True)  # type: ignore
+    else:
+        fig = plot_do_temp_relationship(data["raw_df"])
+        st.pyplot(fig)
+elif section == "Calendar Heatmaps":
+    st.title("Calendar Heatmaps")
+    st.info(
+        "💡 You can customize the colormaps using the 'Plot Settings' expander in the sidebar."
+    )
+    raw_df = data["raw_df"]
+    raw_df["Date"] = pd.to_datetime(raw_df["Activity_Start_Date_Time"]).dt.date
+    default_analytes = [
+        "Temperature, Water",
+        "Dissolved Oxygen",
+        "Salinity",
+        "pH",
+        "Turbidity",
+        "Depth, Secchi Disk Depth",
+        "Fecal Coliform (MPN)",
+        "Total Nitrogen",
+        "Total Phosphorus",
+        "Chlorophyll-uncorrected",
+    ]
+    # Get all unique analytes and ensure defaults are at the start of the list
+    all_analytes = default_analytes + [
+        x
+        for x in sorted(raw_df["Org_Analyte_Name"].unique())
+        if x not in default_analytes
+    ]
+    selected_analytes = st.sidebar.multiselect(
+        "Select Analytes:",
+        options=all_analytes,
+        default=default_analytes,
+        key="calendar_analyte_select",
+        help="Choose one or more analytes to display in the heatmap.",
+    )
+    # Filter Options
+    st.sidebar.markdown("### Filter Options")
+    sector_filter = st.sidebar.selectbox(
+        "Sector:",
+        ["All"] + sorted(raw_df["Sector"].unique().tolist()),
+        index=0,
+        key="calendar_sector_select",
+    )
+    position_filter = st.sidebar.selectbox(
+        "Position:",
+        ["All", "Surface", "Bottom"],
+        index=0,
+        key="calendar_position_select",
+    )
+    def format_colormap_option(option):
+        append = ""
+        if option in [
+            "viridis",  # Sequential
+            "plasma",  # Sequential
+            "inferno",  # Sequential
+            "magma",  # Sequential
+            "GnBu",  # Sequential (Multi-hue)
+            "Blues",  # Sequential (Single-hue)
+            "Blues_r",  # Sequential (Single-hue, reversed)
+        ]:
+            append = " [Sequential]"
+        elif option in [
+            "YlOrRd",  # Sequential (Multi-hue)
+            "YlGnBu",  # Sequential (Multi-hue)
+            "RdPu",  # Sequential (Multi-hue)
+        ]:
+            append = " [Sequential (Multi-hue)]"
+        elif option in [
+            "RdYlBu",  # Diverging
+            "RdBu",  # Diverging
+            "coolwarm",  # Diverging
+        ]:
+            append = " [Diverging]"
+        return option + append
+    colormap_help_text = """
+    Any selection here will override the default color scheme for all of the displayed
+    heatmaps. Selecting Default will revert to the analyte-specific default color schemes.
+    **The default color schemes are:**
+    `Fecal Coliform (MPN)`          : `viridis`    _(blue-green-yellow)_
+    `Temperature, Water`            : `coolwarm`   _(red-white-blue)_
+    `Dissolved Oxygen`             : `RdYlBu`     _(red-yellow-blue)_
+    `Total Nitrogen/Phosphorus`    : `GnBu`       _(green-blue)_
+    `Depth, Secchi Disk Depth`     : `Blues_r`    _(reversed blues)_
+    `All other analytes`           : `Blues`      _(blue)_
+    """
+    with st.sidebar.expander("Plot Settings", expanded=False):
+        colormap = st.radio(
+            "Color Scheme",
+            options=[
+                "Default",
+                # Sequential (Perceptually Uniform)
+                "viridis",
+                "plasma",
+                "inferno",
+                "magma",
+                # Sequential (Single-hue)
+                "Blues",
+                "Blues_r",
+                # Sequential (Multi-hue)
+                "GnBu",
+                "YlOrRd",
+                "YlGnBu",
+                "RdPu",
+                # Diverging
+                "RdYlBu",
+                "RdBu",
+                "coolwarm",
+            ],
+            index=0,
+            help=colormap_help_text,
+            format_func=format_colormap_option,
+        )
+        if colormap == "Default":
+            colormap = None
+    # Filter data
+    plot_df = raw_df.copy()
+    if sector_filter != "All":
+        plot_df = plot_df[plot_df["Sector"] == sector_filter]
+    if position_filter != "All":
+        plot_df = plot_df[plot_df["Sample_Position"] == position_filter]
+    if not plot_df.empty:
+        for analyte in selected_analytes:
+            fig = plot_calendar_heatmap(plot_df, analyte, colormap)
+            st.pyplot(fig)
+    else:
+        st.warning("No data available for the selected filters.")
+elif section == "Seasonal Trends":
+    st.title("Seasonal Trends")
+    raw_df = data["raw_df"]
+    years = sorted(pd.to_datetime(raw_df["Activity_Start_Date_Time"]).dt.year.unique())
+    col1, col2 = st.columns(2)
+    with col1:
+        analyte = st.selectbox(
+            "Select Analyte:", ["Salinity"], index=0, key="seasonal_analyte_select"
+        )
+    with col2:
+        selected_year = st.selectbox(
+            "Select Year:",
+            sorted(years, reverse=True),
+            index=0,
+            key="seasonal_year_select",
+        )
+    if not raw_df.empty:
+        seasonal_data = load_seasonal_data(raw_df, analyte)
+        fig = generate_seasonal_plot(
+            seasonal_data,
+            str(selected_year),
+            shapefile_path="data/SAB/SAB.shp",
+        )
+        st.pyplot(fig)
+    else:
+        st.warning("No data available for seasonal analysis.")
+elif section == "Nutrient Ratios":
+    ENABLE_ALTAIR = st.sidebar.toggle("Interactive Plots", value=ENABLE_ALTAIR)
+    st.title("Nutrient Ratios")
+    raw_df = data["raw_df"]
+    if not raw_df.empty:
+        if ENABLE_ALTAIR:
+            vconcat = altair_plot_np_ratios(raw_df)
+            st.altair_chart(vconcat, use_container_width=True)  # type: ignore
+        else:
+            fig = plot_np_ratios(raw_df)
+            st.pyplot(fig)
+    else:
+        st.warning("No data available for nutrient ratio analysis.")
+elif section == "Raw Data":
+    st.title("Raw Data")
+    raw_df = data["raw_df"]
+    raw_download_format = st.radio(
+        "Select download format:",
+        ["CSV", "Excel"],
+        key="raw_download",
+        horizontal=True,
+    )
+    if raw_download_format == "Excel":
+        excel_buffer = io.BytesIO()
+        with pd.ExcelWriter(excel_buffer, engine="xlsxwriter") as writer:
+            raw_df.to_excel(writer, sheet_name="Raw Water Quality Data", index=False)
+        raw_excel = excel_buffer.getvalue()
+        download_data = (raw_excel, "xlsx", "application/vnd.ms-excel")
+    else:
+        download_data = data["downloads"]["raw"]["CSV"]
+    st.download_button(
+        label=f"Download Raw Data ({raw_download_format})",
+        data=download_data[0],
+        file_name=f"water_quality_raw_2019-01-01_-_2024-10-31.{download_data[1]}",
+        mime=download_data[2],
+    )
+    st.markdown(f"""
+    Preview of the first 1,000 of {raw_df.shape[0]:,} records in the dataset.
+    """)
+    st.dataframe(
+        raw_df.head(1000).style.format(precision=2),
+        use_container_width=True,
+        height=600,
+    )
+elif section == "Analytics":
+    st.title("Analytics")
+    log_file = Path("analytics.json")
+    if log_file.exists():
+        with open(log_file, "r") as f:
+            analytics_data = json.load(f)
+        col1, col2 = st.columns(2)
+        with col1:
+            visits_df = pd.DataFrame(analytics_data["visits"])
+            visits_df["timestamp"] = pd.to_datetime(visits_df["timestamp"])
+            daily_visits_df = (
+                visits_df.groupby("date")["visitor_id"]
+                .agg(["nunique", "count"])
+                .reset_index()
+                .rename(columns={"nunique": "Unique Visitors", "count": "Total Views"})
+            )
+            daily_visits_df["date"] = pd.to_datetime(daily_visits_df["date"])
+            daily_visits_df = daily_visits_df.sort_values("date")
+            total_unique_visitors = visits_df["visitor_id"].nunique()
+            total_views = len(visits_df)
+            avg_views_per_visitor = total_views / total_unique_visitors
+            st.subheader("Visitor Metrics")
+            metrics_col1, metrics_col2, metrics_col3 = st.columns(3)
+            metrics_col1.metric("Total Unique Visitors", total_unique_visitors)
+            metrics_col2.metric("Total Page Views", total_views)
+            metrics_col3.metric("Avg Views per Visitor", f"{avg_views_per_visitor:.1f}")
+            st.subheader("Daily Statistics")
+            st.dataframe(
+                daily_visits_df.style.format(
+                    {"Unique Visitors": "{:,.0f}", "Total Views": "{:,.0f}"}
+                ),
+                hide_index=True,
+            )
+        with col2:
+            section_visits_df = pd.DataFrame(
+                {
+                    "Section": analytics_data["section_counts"].keys(),
+                    "Views": analytics_data["section_counts"].values(),
+                }
+            )
+            section_visits_df = section_visits_df.sort_values("Views", ascending=True)
+            st.subheader("Total Section Views")
+            st.bar_chart(section_visits_df.set_index("Section"))
+        with st.expander("Raw Visit Data"):
+            visits_df = pd.DataFrame(analytics_data["visits"])
+            visits_df["timestamp"] = pd.to_datetime(visits_df["timestamp"])
+            st.dataframe(visits_df)
+    else:
+        st.warning("No analytics data available.")
+if ENABLE_TIMING:
+    st.markdown("---")
+    st.subheader("⚡ Performance Metrics")
+    if hasattr(st.session_state, "timing_stats"):
+        st.markdown("#### Summary Statistics")
+        # Create a summary dataframe with min, max, mean, and count
+        timing_summary = []
+        for func_name, durations in st.session_state.timing_stats.items():
+            timing_summary.append(
+                {
+                    "Function": func_name,
+                    "Min (seconds)": min(durations),
+                    "Max (seconds)": max(durations),
+                    "Mean (seconds)": sum(durations) / len(durations),
+                    "Calls": len(durations),
+                }
+            )
+        timing_df = pd.DataFrame(timing_summary).sort_values(
+            "Mean (seconds)", ascending=False
+        )
+        st.dataframe(
+            timing_df.style.format(
+                {
+                    "Min (seconds)": "{:.2f}",
+                    "Max (seconds)": "{:.2f}",
+                    "Mean (seconds)": "{:.2f}",
+                    "Calls": "{:,.0f}",
+                }
+            ),
+            use_container_width=True,
+        )
+        st.markdown("#### Detailed Function Calls")
+        if st.session_state.timing_logs:
+            logs_df = pd.DataFrame(st.session_state.timing_logs)
+            logs_df["timestamp"] = pd.to_datetime(logs_df["timestamp"])
+            # Format parameters column if it exists
+            if "parameters" in logs_df.columns:
+                # Option 1: Create wrapped text with newlines
+                logs_df["parameters"] = logs_df["parameters"].apply(
+                    lambda x: (
+                        "\n".join(
+                            textwrap.wrap(
+                                "\n".join(f"{k}: {v}" for k, v in x.items()),
+                                width=50,
+                                break_long_words=False,
+                                replace_whitespace=False,
+                            )
+                        )
+                        if isinstance(x, dict)
+                        else str(x)
+                    )
+                )
+            logs_df = logs_df.sort_values("timestamp", ascending=False)
+            st.dataframe(
+                logs_df.style.format(
+                    {
+                        "duration": "{:.2f} seconds",
+                        "timestamp": lambda x: x.strftime("%H:%M:%S.%f")[:-3],
+                    }
+                ),
+                use_container_width=True,
+                height=400,
+                column_config={
+                    "parameters": st.column_config.TextColumn(
+                        "parameters",
+                        width="large",
+                        help="Function parameters and their values",
+                    )
+                },
+            )
+    else:
+        st.info("No timing statistics available yet. Try refreshing the page.")

data/SAB/SAB.cpg ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3ad3031f5503a4404af825262ee8232cc04d4ea6683d42c5dd0a2f2a27ac9824
+size 5

data/SAB/SAB.prj ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0360a15fcf4a096367d80e8c723d6dde12e82e4b05b906398443ddfc6a17b6cb
+size 454

data/SAB/SAB.qmd ADDED Viewed

	@@ -0,0 +1,27 @@

+<!DOCTYPE qgis PUBLIC 'http://mrcc.com/qgis.dtd' 'SYSTEM'>
+<qgis version="3.40.0-Bratislava">
+  <identifier></identifier>
+  <parentidentifier></parentidentifier>
+  <language></language>
+  <type>dataset</type>
+  <title></title>
+  <abstract></abstract>
+  <links/>
+  <dates/>
+  <fees></fees>
+  <encoding></encoding>
+  <crs>
+    <spatialrefsys nativeFormat="Wkt">
+      <wkt>PROJCRS["NAD83(2011) / Florida GDL Albers",BASEGEOGCRS["NAD83(2011)",DATUM["NAD83 (National Spatial Reference System 2011)",ELLIPSOID["GRS 1980",6378137,298.257222101,LENGTHUNIT["metre",1]]],PRIMEM["Greenwich",0,ANGLEUNIT["degree",0.0174532925199433]],ID["EPSG",6318]],CONVERSION["Florida GDL Albers (meters)",METHOD["Albers Equal Area",ID["EPSG",9822]],PARAMETER["Latitude of false origin",24,ANGLEUNIT["degree",0.0174532925199433],ID["EPSG",8821]],PARAMETER["Longitude of false origin",-84,ANGLEUNIT["degree",0.0174532925199433],ID["EPSG",8822]],PARAMETER["Latitude of 1st standard parallel",24,ANGLEUNIT["degree",0.0174532925199433],ID["EPSG",8823]],PARAMETER["Latitude of 2nd standard parallel",31.5,ANGLEUNIT["degree",0.0174532925199433],ID["EPSG",8824]],PARAMETER["Easting at false origin",400000,LENGTHUNIT["metre",1],ID["EPSG",8826]],PARAMETER["Northing at false origin",0,LENGTHUNIT["metre",1],ID["EPSG",8827]]],CS[Cartesian,2],AXIS["easting (X)",east,ORDER[1],LENGTHUNIT["metre",1]],AXIS["northing (Y)",north,ORDER[2],LENGTHUNIT["metre",1]],USAGE[SCOPE["State-wide spatial data management."],AREA["United States (USA) - Florida."],BBOX[24.41,-87.63,31.01,-79.97]],ID["EPSG",6439]]</wkt>
+      <proj4>+proj=aea +lat_0=24 +lon_0=-84 +lat_1=24 +lat_2=31.5 +x_0=400000 +y_0=0 +ellps=GRS80 +units=m +no_defs</proj4>
+      <srsid>28506</srsid>
+      <srid>6439</srid>
+      <authid>EPSG:6439</authid>
+      <description>NAD83(2011) / Florida GDL Albers</description>
+      <projectionacronym>aea</projectionacronym>
+      <ellipsoidacronym>EPSG:7019</ellipsoidacronym>
+      <geographicflag>false</geographicflag>
+    </spatialrefsys>
+  </crs>
+  <extent/>
+</qgis>

main.py ADDED Viewed

	@@ -0,0 +1,232 @@

+import pandas as pd
+def get_raw_data(file_path: str):
+    """
+    Read raw data from a CSV or Parquet file.
+    """
+    if file_path.endswith(".parquet"):
+        return pd.read_parquet(file_path)
+    categorical_columns = [
+        "Monitoring_Location_ID",
+        "Activity_Depth_Unit",
+        "Sample_Position",
+        "Time_Zone",
+        "Activity_Type",
+        "Waterbody_Class",
+        "WBID",
+        "Name",
+        "Sector",
+        "Total_Depth_Unit",
+        "Org_Analyte_Name",
+    ]
+    dtype_dict = {
+        "Station_Number": str,
+        **{col: "category" for col in categorical_columns},
+    }
+    return pd.read_csv(file_path, dtype=dtype_dict).assign(
+        Org_Result_Value=lambda df: pd.to_numeric(
+            df["Org_Result_Value"].replace("Not Reported", pd.NA), errors="coerce"
+        ),
+        Activity_Start_Date_Time=lambda df: pd.to_datetime(
+            df["Activity_Start_Date_Time"]
+        ),
+    )
+def get_stations_data() -> pd.DataFrame:
+    """
+    Return stations data as a dataframe.
+    """
+    return pd.read_csv("data/Stations-Locations.csv")
+def add_lat_long(raw_df: pd.DataFrame, stations_df: pd.DataFrame) -> pd.DataFrame:
+    """
+    Add latitude and longitude to raw data based on station number.
+    """
+    raw_df["Number"] = raw_df["Station_Number"].astype(float)
+    raw_df = raw_df.merge(
+        stations_df[["Number", "Latitude", "Longitude"]],
+        left_on="Number",
+        right_on="Number",
+        how="left",
+    )
+    return raw_df.drop("Number", axis=1)
+def get_analyte_data_with_lat_long(df: pd.DataFrame, analyte: str) -> pd.DataFrame:
+    """
+    Extract and transform data for a specific analyte, adding geographical coordinates.
+    This function processes raw water quality data by:
+    1. Adding latitude/longitude coordinates from stations data
+    2. Filtering for a specific analyte
+    3. Removing rows with missing values
+    4. Aggregating duplicate measurements using mean values
+    Args:
+        df (pd.DataFrame): Raw water quality data containing at minimum these columns:
+            - Station_Number
+            - Org_Analyte_Name
+            - Org_Result_Value
+        analyte (str): Name of the analyte to filter for (e.g., "Temperature, Water")
+    Returns:
+        pd.DataFrame: Processed dataframe with columns:
+            - Activity_Start_Date_Time: Timestamp of measurement
+            - Station_Number: Monitoring station identifier
+            - Sector: Geographical sector
+            - WBID: Waterbody ID
+            - Sample_Position: Position of sample (e.g., "Surface", "Bottom")
+            - Activity_Depth: Depth of measurement
+            - Latitude: Station latitude
+            - Longitude: Station longitude
+            - {analyte}: Measured value for the specified analyte
+    Note:
+        Duplicate measurements at the same location and time are averaged.
+    """
+    return (
+        df.pipe(add_lat_long, get_stations_data())
+        .query(f"Org_Analyte_Name == '{analyte}'")
+        .dropna(subset=["Org_Result_Value"])
+        .pivot_table(
+            index=[
+                "Activity_Start_Date_Time",
+                "Station_Number",
+                "Sector",
+                "WBID",
+                "Sample_Position",
+                "Activity_Depth",
+                "Latitude",
+                "Longitude",
+            ],
+            values="Org_Result_Value",
+            aggfunc="mean",
+            observed=True,
+        )
+        .reset_index()
+        .rename(columns={"Org_Result_Value": analyte})
+    )
+def create_station_stats(
+    pivoted: pd.DataFrame, station: str | float | int
+) -> pd.DataFrame:
+    """
+    Create statistics for a specific station from pivoted data.
+    Args:
+        pivoted: Pivoted DataFrame containing water quality measurements
+        station: Station identifier
+    Returns:
+        DataFrame with statistics for various water quality parameters
+    """
+    PARAMETERS = {
+        "Secchi Depth (feet)": ("Depth, Secchi Disk Depth", ["Surface"]),
+        "Temperature (°C)": ("Temperature, Water", ["Surface", "Bottom"]),
+        "Dissolved Oxygen (mg/L)": ("Dissolved Oxygen", ["Surface", "Bottom"]),
+        "Turbidity (NTU)": ("Turbidity", ["Surface", "Bottom"]),
+        "Salinity (ppt)": ("Salinity", ["Surface", "Bottom"]),
+        "pH": ("pH", ["Surface", "Bottom"]),
+    }
+    STATS = {"Average": "mean", "Maximum": "max", "Minimum": "min", "n=": "count"}
+    data = {"Station": station, "Statistic": list(STATS.keys())}
+    for param_name, (param_code, positions) in PARAMETERS.items():
+        for position in positions:
+            col_name = f"{param_name} {position}" if len(positions) > 1 else param_name
+            data[col_name] = [
+                pivoted[stat][position][station, param_code] for stat in STATS.values()
+            ]
+    return pd.DataFrame(data)
+def create_overall_summary(df: pd.DataFrame) -> pd.DataFrame:
+    summary = (
+        df.groupby(["Org_Analyte_Name"], observed=False)["Org_Result_Value"]
+        .agg(["mean", "max", "min", "count"])
+        .round(2)
+        .rename(
+            columns={
+                "count": "Count",
+                "mean": "Mean",
+                "max": "Maximum",
+                "min": "Minimum",
+            }
+        )
+    )
+    summary.index.name = None
+    transposed = summary.T
+    return transposed.rename(
+        columns={
+            "Depth, Secchi Disk Depth": "Secchi Depth (feet)",
+            "Dissolved Oxygen": "Dissolved Oxygen (mg/L)",
+            "Salinity": "Salinity (ppt)",
+            "Turbidity": "Turbidity (NTU)",
+            "Temperature, Water": "Temperature (°C)",
+        }
+    ).loc[
+        :,
+        [
+            "Secchi Depth (feet)",
+            "Temperature (°C)",
+            "Dissolved Oxygen (mg/L)",
+            "Turbidity (NTU)",
+            "Salinity (ppt)",
+            "pH",
+        ],
+    ]
+def create_summary_by_station_and_position(
+    df: pd.DataFrame, exclude_analytes: list[str] | None = None
+) -> pd.DataFrame:
+    """
+    Create a summary statistics table from water quality measurements.
+    Args:
+        df (pd.DataFrame): Processed dataframe from get_data function
+    Returns:
+        pd.DataFrame: Summary statistics table with surface/bottom measurements
+    """
+    if exclude_analytes is None:
+        exclude_analytes = []
+    summary = (
+        df.query("Org_Analyte_Name not in @exclude_analytes")
+        .groupby(
+            ["Station_Number", "Sample_Position", "Org_Analyte_Name"], observed=False
+        )["Org_Result_Value"]
+        .agg(["mean", "max", "min", "count"])
+        .round(2)
+    )
+    pivoted = summary.reset_index().pivot_table(
+        index=["Station_Number", "Org_Analyte_Name"],
+        columns=["Sample_Position"],
+        values=["mean", "max", "min", "count"],
+        observed=False,
+    )
+    stations = sorted(df["Station_Number"].unique())
+    return pd.concat(
+        [create_station_stats(pivoted, station) for station in stations]
+    ).set_index(["Station", "Statistic"])
+def create_multiindex_columns(df: pd.DataFrame) -> pd.DataFrame:
+    new_df = df.copy()
+    new_df.columns = pd.MultiIndex.from_tuples(
+        [
+            (col.rsplit(" ", 1)[0], col.rsplit(" ", 1)[1])
+            if col != "Secchi Depth (feet)"
+            else ("", col)
+            for col in df.columns
+        ],
+        names=["Analyte", "Position"],
+    )
+    return new_df

pyproject.toml ADDED Viewed

	@@ -0,0 +1,32 @@

+[project]
+name = "state-of-the-bay"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.12"
+dependencies = [
+    "contextily>=1.6.2",
+    "folium>=0.18.0",
+    "geopandas[all]>=1.0.1",
+    "great-tables>=0.13.0",
+    "ipykernel>=6.29.5",
+    "matplotlib>=3.9.2",
+    "nbformat>=5.10.4",
+    "osmnx>=1.9.3",
+    "pandas>=2.2.3",
+    "plotly>=5.24.1",
+    "plotnine>=0.14.1",
+    "polars>=1.12.0",
+    "pygwalker>=0.4.9.13",
+    "pytest>=8.3.3",
+    "scipy>=1.14.1",
+    "seaborn>=0.13.2",
+    "streamlit>=1.40.0",
+    "watchdog>=5.0.3",
+    "xlsxwriter>=3.2.0",
+]
+[tool.uv]
+dev-dependencies = [
+    "ipykernel>=6.29.5",
+]

requirements.txt ADDED Viewed

	@@ -0,0 +1,14 @@

+streamlit
+pandas
+xlsxwriter
+numpy
+scipy
+matplotlib
+plotly
+great-tables
+polars
+seaborn
+geopandas[all]
+contextily
+plotly-express
+altair

tests/test_main.py ADDED Viewed

	@@ -0,0 +1,116 @@

+import numpy as np
+import pandas as pd
+import pytest
+from main import create_station_stats
+@pytest.fixture
+def sample_pivoted_data():
+    """Create a sample pivoted dataset that matches the expected structure"""
+    # Create sample data
+    index = pd.MultiIndex.from_product(
+        [
+            ["1.00", "3.20"],  # Station_Number
+            [
+                "Depth, Secchi Disk Depth",
+                "Temperature, Water",
+                "Dissolved Oxygen",
+                "Turbidity",
+                "Salinity",
+                "pH",
+            ],  # Org_Analyte_Name
+        ],
+        names=["Station_Number", "Org_Analyte_Name"],
+    )
+    # Create MultiIndex columns
+    columns = pd.MultiIndex.from_product(
+        [
+            ["count", "max", "mean", "min"],  # Aggregation functions
+            ["Bottom", "Surface"],  # Sample_Position
+        ]
+    )
+    # Create sample data with consistent values
+    data = np.full((len(index), len(columns)), 10.0)
+    # Create DataFrame first
+    df = pd.DataFrame(data, index=index, columns=columns)
+    # Set count values to 100 using proper MultiIndex access
+    df.loc[:, ("count", "Bottom")] = 100
+    df.loc[:, ("count", "Surface")] = 100
+    return df
+def test_create_station_stats_basic(sample_pivoted_data):
+    """Test basic functionality of create_station_stats"""
+    station = "3.20"
+    result = create_station_stats(sample_pivoted_data, station)
+    # Check basic structure
+    assert isinstance(result, pd.DataFrame)
+    assert len(result) == 4  # Average, Maximum, Minimum, n=
+    assert "Station" in result.columns
+    assert "Statistic" in result.columns
+def test_create_station_stats_values(sample_pivoted_data):
+    """Test that values are correctly mapped from pivoted data"""
+    station = "3.20"
+    result = create_station_stats(sample_pivoted_data, station)
+    # Check specific values for Dissolved Oxygen
+    surface_do = result["Dissolved Oxygen (mg/L) Surface"].tolist()
+    assert surface_do == [10.0, 10.0, 10.0, 100]  # mean, max, min, count
+def test_create_station_stats_columns(sample_pivoted_data):
+    """Test that all expected columns are present"""
+    station = "3.20"
+    result = create_station_stats(sample_pivoted_data, station)
+    expected_columns = {
+        "Station",
+        "Statistic",
+        "Secchi Depth (feet)",
+        "Temperature (°C) Surface",
+        "Temperature (°C) Bottom",
+        "Dissolved Oxygen (mg/L) Surface",
+        "Dissolved Oxygen (mg/L) Bottom",
+        "Turbidity (NTU) Surface",
+        "Turbidity (NTU) Bottom",
+        "Salinity (ppt) Surface",
+        "Salinity (ppt) Bottom",
+        "pH Surface",
+        "pH Bottom",
+    }
+    assert set(result.columns) == expected_columns
+def test_create_station_stats_missing_data(sample_pivoted_data):
+    """Test handling of missing data"""
+    station = "3.20"
+    # Introduce some NaN values
+    sample_pivoted_data.loc[(station, "pH"), ("mean", "Surface")] = np.nan
+    result = create_station_stats(sample_pivoted_data, station)
+    assert pd.isna(result["pH Surface"][0])  # Check if NaN is preserved
+def test_create_station_stats_statistics(sample_pivoted_data):
+    """Test that statistics are in correct order"""
+    station = "3.20"
+    result = create_station_stats(sample_pivoted_data, station)
+    expected_statistics = ["Average", "Maximum", "Minimum", "n="]
+    assert result["Statistic"].tolist() == expected_statistics
+def test_create_station_stats_invalid_station(sample_pivoted_data):
+    """Test behavior with invalid station"""
+    with pytest.raises(KeyError):
+        create_station_stats(sample_pivoted_data, "invalid_station")

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff