waterdb

Sleeping

App Files Files Community

github-actions[bot] commited on Nov 18, 2024

Commit

39e6d5e

1 Parent(s): bb82b34

Add all files with LFS support

Browse files

Files changed (5) hide show

analysis.py +132 -0
data/master_data_file_2010-01-01_-_2024-10-31.parquet +0 -0
main.py +14 -12
pages.py +17 -8
utils/data_loading.py +1 -1

analysis.py CHANGED Viewed

@@ -1254,6 +1254,138 @@ def plot_do_temp_relationship(df: pd.DataFrame) -> Figure:
     return g.figure
 def altair_plot_do_temp_relationship(df: pd.DataFrame) -> alt.LayerChart:
     """
     Create an interactive scatter plot of DO vs temperature with regression lines using Altair.

     return g.figure
+def plotly_plot_do_temp_relationship(df: pd.DataFrame) -> go.Figure:
+    """
+    Create an interactive scatter plot of DO vs temperature with regression lines using Plotly.
+    Matches the style and features of the original matplotlib/seaborn plot.
+    Parameters:
+    -----------
+    df : pd.DataFrame
+        Input dataframe containing DO and temperature measurements
+    Returns:
+    --------
+    go.Figure
+        Plotly figure object
+    """
+    # Prepare the data similarly to the original function
+    do_temp_data = (
+        df[df["Org_Analyte_Name"].isin(["Dissolved Oxygen", "Temperature, Water"])]
+        .pivot_table(
+            index=[
+                "Activity_Start_Date_Time",
+                "Station_Number",
+                "Sample_Position",
+                "Sector",  # Added for tooltip
+            ],
+            columns="Org_Analyte_Name",
+            values="Org_Result_Value",
+            observed=True,
+        )
+        .reset_index()
+        .dropna(subset=["Dissolved Oxygen", "Temperature, Water"])
+    )
+    # Create figure
+    fig = go.Figure()
+    # Colors matching seaborn's muted palette
+    colors = {"Surface": "#8da0cb", "Bottom": "#fc8d62"}
+    # Add scatter plots and regression lines for each position
+    for position in ["Surface", "Bottom"]:
+        pos_data = do_temp_data[do_temp_data["Sample_Position"] == position]
+        # Add scatter plot
+        fig.add_trace(
+            go.Scatter(
+                x=pos_data["Temperature, Water"],
+                y=pos_data["Dissolved Oxygen"],
+                mode="markers",
+                name=position,
+                marker=dict(color=colors[position], size=8, opacity=0.6),
+                hovertemplate=(
+                    "Temperature: %{x:.1f}°C<br>"
+                    "DO: %{y:.1f} mg/L<br>"
+                    "Position: " + position + "<br>"
+                    "Station: %{customdata[0]}<br>"
+                    "Sector: %{customdata[1]}<br>"
+                    "<extra></extra>"
+                ),
+                customdata=pos_data[["Station_Number", "Sector"]],
+            )
+        )
+        # Calculate and add regression line
+        z = np.polyfit(pos_data["Temperature, Water"], pos_data["Dissolved Oxygen"], 1)
+        p = np.poly1d(z)
+        x_range = np.linspace(
+            pos_data["Temperature, Water"].min(),
+            pos_data["Temperature, Water"].max(),
+            100,
+        )
+        fig.add_trace(
+            go.Scatter(
+                x=x_range,
+                y=p(x_range),
+                mode="lines",
+                line=dict(color=colors[position], dash="dash"),
+                name=f"{position} Trend",
+                hovertemplate=None,
+                hoverinfo="skip",
+                showlegend=False,
+            )
+        )
+    # Add DO threshold line
+    fig.add_hline(
+        y=5,
+        line=dict(color="red", width=1, dash="dot"),
+        opacity=0.5,
+        annotation_text="5 mg/L DO threshold",
+        annotation_position="left",
+        annotation=dict(
+            font=dict(color="red", size=12),
+            xanchor="left",
+            yanchor="bottom",
+            opacity=0.5,
+        ),
+    )
+    # Update layout
+    fig.update_layout(
+        title=dict(
+            text="Dissolved Oxygen vs Water Temperature",
+            x=0.5,
+            y=0.95,
+            xanchor="center",
+            yanchor="top",
+            font=dict(size=16),
+        ),
+        xaxis_title="Water Temperature (°C)",
+        yaxis_title="Dissolved Oxygen (mg/L)",
+        legend_title="Sample Position",
+        legend=dict(
+            yanchor="top",
+            y=1,
+            xanchor="left",
+            x=1.05,
+        ),
+        template="plotly_white",
+        width=800,
+        height=600,
+        showlegend=True,
+    )
+    # Update axes
+    fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor="rgba(128, 128, 128, 0.2)")
+    fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor="rgba(128, 128, 128, 0.2)")
+    return fig
 def altair_plot_do_temp_relationship(df: pd.DataFrame) -> alt.LayerChart:
     """
     Create an interactive scatter plot of DO vs temperature with regression lines using Altair.

data/master_data_file_2010-01-01_-_2024-10-31.parquet ADDED Viewed

Binary file (493 kB). View file

main.py CHANGED Viewed

@@ -1,13 +1,11 @@
 import pandas as pd
-def get_raw_data(file_path: str):
     """
-    Read raw data from a CSV or Parquet file.
     """
-    if file_path.endswith(".parquet"):
-        return pd.read_parquet(file_path)
     categorical_columns = [
         "Monitoring_Location_ID",
         "Activity_Depth_Unit",
@@ -27,11 +25,15 @@ def get_raw_data(file_path: str):
         **{col: "category" for col in categorical_columns},
     }
-    return pd.read_csv(file_path, dtype=dtype_dict).assign(
-        Org_Result_Value=lambda df: pd.to_numeric(
-            df["Org_Result_Value"].replace("Not Reported", pd.NA), errors="coerce"
-        ),
-        Activity_Start_Date_Time=lambda df: pd.to_datetime(
-            df["Activity_Start_Date_Time"]
-        ),
     )

 import pandas as pd
+def master_data_csv_to_parquet(file_path: str):
     """
+    Convert master data export from a CSV to a Parquet file.
     """
+    save_path = file_path.replace(".csv", ".parquet")
     categorical_columns = [
         "Monitoring_Location_ID",
         "Activity_Depth_Unit",
         **{col: "category" for col in categorical_columns},
     }
+    return (
+        pd.read_csv(file_path, dtype=dtype_dict)
+        .assign(
+            Org_Result_Value=lambda df: pd.to_numeric(
+                df["Org_Result_Value"].replace("Not Reported", pd.NA), errors="coerce"
+            ),
+            Activity_Start_Date_Time=lambda df: pd.to_datetime(
+                df["Activity_Start_Date_Time"]
+            ),
+        )
+        .to_parquet(save_path)
     )

pages.py CHANGED Viewed

@@ -7,7 +7,6 @@ import pandas as pd
 import streamlit as st
 from analysis import (
-    altair_plot_do_temp_relationship,
     altair_plot_np_ratios,
     altair_plot_sector_trends,
     generate_seasonal_plot,
@@ -16,6 +15,7 @@ from analysis import (
     plot_np_ratios,
     plot_sector_trends,
     plot_trends_by_station,
 )
 from components import (
     get_reporting_year_info_message,
@@ -187,18 +187,24 @@ def home_section():
         stat_col1, stat_col2 = st.columns(2)
         with stat_col1:
-            st.metric("Active Stations", len(stations_df))
             st.metric("Sectors", len(stations_df["Sector"].unique()))
-            st.metric("Waterbody IDs", len(stations_df["WBID"].unique()))
         with stat_col2:
-            st.metric("Total Samples", f"{int(stations_df['Total_Samples'].sum()):,}")
     with map_col1:
         render_stations_map(stations_df)
     # Add stations table
-    st.markdown("### Stations Details")
     # Create a simplified view of the stations data
     display_columns = [
@@ -289,12 +295,15 @@ def home_section():
     csv_buffer = io.StringIO()
     stations_table.to_csv(csv_buffer, index=False)
     st.download_button(
-        label="Download Stations Data (CSV)",
         data=csv_buffer.getvalue(),
         file_name="monitoring_stations.csv",
         mime="text/csv",
     )
     # Summary Section
     st.markdown("## Data Summary")
     tab1, tab2 = st.tabs(["Overall Summary", "Summary by Station"])
@@ -347,8 +356,8 @@ def do_temp_relationship_section():
         "This plot shows the relationship between dissolved oxygen and water temperature for all data."
     )
     if st.session_state.ENABLE_ALTAIR:
-        fig = altair_plot_do_temp_relationship(st.session_state.data["raw_df"])
-        st.altair_chart(fig, use_container_width=True)  # type: ignore
     else:
         fig = plot_do_temp_relationship(st.session_state.data["raw_df"])
         st.pyplot(fig)

 import streamlit as st
 from analysis import (
     altair_plot_np_ratios,
     altair_plot_sector_trends,
     generate_seasonal_plot,
     plot_np_ratios,
     plot_sector_trends,
     plot_trends_by_station,
+    plotly_plot_do_temp_relationship,
 )
 from components import (
     get_reporting_year_info_message,
         stat_col1, stat_col2 = st.columns(2)
         with stat_col1:
+            # Calculate active stations (sampled within last 12 months)
+            today = pd.Timestamp.today()
+            active_mask = pd.to_datetime(stations_df["Most_Recent_Sample"]) > (
+                today - pd.DateOffset(months=12)
+            )
+            st.metric("Active Stations", active_mask.sum())
             st.metric("Sectors", len(stations_df["Sector"].unique()))
+            st.metric("Total Samples", f"{int(stations_df['Total_Samples'].sum()):,}")
         with stat_col2:
+            st.metric("Total Stations", len(stations_df))
+            st.metric("Waterbody IDs", len(stations_df["WBID"].unique()))
     with map_col1:
         render_stations_map(stations_df)
     # Add stations table
+    st.markdown("### Station Details")
     # Create a simplified view of the stations data
     display_columns = [
     csv_buffer = io.StringIO()
     stations_table.to_csv(csv_buffer, index=False)
     st.download_button(
+        label="Download Station Details",
         data=csv_buffer.getvalue(),
         file_name="monitoring_stations.csv",
         mime="text/csv",
     )
+@log_page_visit()
+def data_summary_section():
     # Summary Section
     st.markdown("## Data Summary")
     tab1, tab2 = st.tabs(["Overall Summary", "Summary by Station"])
         "This plot shows the relationship between dissolved oxygen and water temperature for all data."
     )
     if st.session_state.ENABLE_ALTAIR:
+        fig = plotly_plot_do_temp_relationship(st.session_state.data["raw_df"])
+        st.plotly_chart(fig, use_container_width=True)  # type: ignore
     else:
         fig = plot_do_temp_relationship(st.session_state.data["raw_df"])
         st.pyplot(fig)

utils/data_loading.py CHANGED Viewed

@@ -123,7 +123,7 @@ def load_data(
         end_date: Optional end date filter
         reporting_month: Optional reporting month filter
     """
-    raw_df = get_raw_data("data/master_data_file_2019-01-01_-_2024-10-31.parquet")
     # Get full dataset date range for the date input controls
     full_dataset_metadata = get_dataset_metadata(raw_df)

         end_date: Optional end date filter
         reporting_month: Optional reporting month filter
     """
+    raw_df = get_raw_data("data/master_data_file_2010-01-01_-_2024-10-31.parquet")
     # Get full dataset date range for the date input controls
     full_dataset_metadata = get_dataset_metadata(raw_df)